def main(args):
    """Main train and evaluation function.

    Parameters
    ----------
    args: argparse.Namespace
        Arguments
    """
    formatter = logging.Formatter('%(asctime)s %(levelname)s - %(funcName)s: %(message)s',
                                  "%H:%M:%S")
    logger = logging.getLogger(__name__)
    logger.setLevel(args.log_level.upper())
    stream = logging.StreamHandler()
    stream.setLevel(args.log_level.upper())
    stream.setFormatter(formatter)
    logger.addHandler(stream)

    set_seed(args.seed)
    device = get_device(is_gpu=not args.no_cuda)
    exp_dir = os.path.join(RES_DIR, args.name)
    feature_dir = os.path.join(exp_dir, 'training_features')
    logger.info("Root directory for saving and loading experiments: {}".format(exp_dir))

    if not args.is_eval_only:
        create_safe_directory(feature_dir, logger=logger)

        # Setting number of epochs to 1, as we need to extract features
        args.epochs = 1
        args.batch_size = 1

        # PREPARES DATA
        data_loader = get_dataloaders(args.dataset,
                                       batch_size=args.batch_size,
                                       logger=logger, test=False)
        logger.info("Train {} with {} samples".format(args.dataset, len(data_loader.dataset)))

        # PREPARES MODEL
        args.img_size = get_img_size(args.dataset)  # stores for metadata
        model = load_model(exp_dir, filename='model.pt')
        logger.info('Num parameters in model: {}'.format(get_n_param(model)))

        # Extract Features

        model = model.to(device)  # make sure trainer and viz on same device
        fe = FeatureExtractor(model,
                          save_dir=exp_dir,
                          is_progress_bar=not args.no_progress_bar)
        fe(data_loader,
                epochs=args.epochs,
                checkpoint_every=args.checkpoint_every, feature_dir=feature_dir)

        # SAVE MODEL AND EXPERIMENT INFORMATION
        # save_model(trainer.model, exp_dir, metadata=vars(args))
        print('Done.')
Exemple #2
0
def main(args):

    set_seed(args.seed)
    device = torch.device(
        'cuda:{}'.format(args.gpu) if torch.cuda.is_available() else 'cpu')
    exp_dir = os.path.join(RES_DIR, args.name)
    print("save and load experiments at : {}".format(exp_dir))

    if not args.is_eval_only:  #train

        create_directory(exp_dir)

        # PREPARES TRAINING DATA
        train_loader = get_dataloaders(args.dataset,
                                       batch_size=args.batch_size)

        ##############
        # PREPARES MODEL
        args.img_size = get_img_size(args.dataset)  # stores for metadata
        cs = [1, 64, 128, 1024]
        model = VLAE(args, args.latent_dim, cs)

        #TRAINS
        optimizer = optim.Adam(model.parameters(), lr=args.lr)

        model = model.to(device)  # make sure trainer and viz on same device

        gif_visualizer = GifTraversalsTraining(model, args.dataset, exp_dir)

        reg_coeff = [args.reg_coeff0, args.reg_coeff1, args.reg_coeff2]

        trainer = Trainer(model,
                          optimizer,
                          reg_coeff,
                          device=device,
                          save_dir=exp_dir,
                          is_progress_bar=not args.no_progress_bar,
                          gif_visualizer=gif_visualizer)

        trainer(args,
                train_loader,
                epochs=args.epochs,
                checkpoint_every=args.checkpoint_every)

        #SAVE MODEL AND EXPERIMENT INFORMATION

        save_model(trainer.model, exp_dir, metadata=vars(args))
        print("Model has been saved")
def main(args):
    """Main train and evaluation function.

    Parameters
    ----------
    args: argparse.Namespace
        Arguments
    """
    formatter = logging.Formatter('%(asctime)s %(levelname)s - %(funcName)s: %(message)s',
                                  "%H:%M:%S")
    logger = logging.getLogger(__name__)
    logger.setLevel(args.log_level.upper())
    stream = logging.StreamHandler()
    stream.setLevel(args.log_level.upper())
    stream.setFormatter(formatter)
    logger.addHandler(stream)

    set_seed(args.seed)
    device = get_device(is_gpu=not args.no_cuda)
    exp_dir = os.path.join(RES_DIR, args.name)
    logger.info("Root directory for saving and loading experiments: {}".format(exp_dir))

 
    # Initialize a new sweep
    # Arguments:
    #     – sweep_config: the sweep config dictionary defined above
    #     – entity: Set the username for the sweep
    #     – project: Set the project name for the sweep

    
    

    config = wandb.config
    with wandb.init(name="sweep-reg_anneal-seed", 
                    notes='This is a test run', 
                    tags=['btcvae', 'dsprites'],
                    entity='neonkitchen',
                    project]'sweep'
            config = config):
Exemple #4
0
def main():
    # Initialise
    config = parse_args(sys.argv[1:])
    device = get_device()
    set_seed(config.seed)

    # Data
    arrs = np.load(config.data, allow_pickle=True).item()
    train_data = EHR(arrs['X_train'], arrs['Y_train'])
    valid_data = EHR(arrs['X_valid'], arrs['Y_valid'])
    train_loader = DataLoader(train_data,
                              batch_size=config.batch_size,
                              shuffle=True,
                              pin_memory=True)
    valid_loader = DataLoader(valid_data,
                              batch_size=128,
                              shuffle=False,
                              pin_memory=True)

    # Model
    n_tokens = int(arrs['X_train'].max()) + 1  # +1 to embedding size for 0 emb
    model = Net(n_tokens,
                config.emb_dim,
                config.rnn_dim,
                variational=config.variational,
                layer_norm=config.layer_norm).to(device)
    optimizer = optim.Adam(model.parameters(),
                           lr=config.learning_rate,
                           weight_decay=config.weight_decay)

    # Train
    for epoch in range(config.epochs):
        t_loss = train_epoch(model, device, train_loader, optimizer)
        v_loss, v_auroc, v_auprc = test(model, device, valid_loader)

    # Save
    new_model_dir(os.path.join('results', config.name))
    torch.save(model.state_dict(), os.path.join(RES_DIR, "model.h5"))
Exemple #5
0
def main(args):
    """Main train and evaluation function.

    Parameters
    ----------
    args: argparse.Namespace
        Arguments
    """
    formatter = logging.Formatter(
        '%(asctime)s %(levelname)s - %(funcName)s: %(message)s', "%H:%M:%S")
    logger = logging.getLogger(__name__)
    logger.setLevel(args.log_level.upper())
    stream = logging.StreamHandler()
    stream.setLevel(args.log_level.upper())
    stream.setFormatter(formatter)
    logger.addHandler(stream)

    set_seed(args.seed)
    device = get_device(is_gpu=not args.no_cuda)
    exp_dir = os.path.join(RES_DIR, args.name)
    logger.info("Root directory for saving and loading experiments: {}".format(
        exp_dir))

    if not args.is_eval_only:

        create_safe_directory(exp_dir, logger=logger)

        if args.loss == "factor":
            logger.info(
                "FactorVae needs 2 batches per iteration. To replicate this behavior while being consistent, we double the batch size and the the number of epochs."
            )
            args.batch_size *= 2
            args.epochs *= 2

        # PREPARES DATA
        train_loader = get_dataloaders(args.dataset,
                                       batch_size=args.batch_size,
                                       logger=logger)
        logger.info("Train {} with {} samples".format(
            args.dataset, len(train_loader.dataset)))

        # PREPARES MODEL
        args.img_size = get_img_size(args.dataset)  # stores for metadata
        model = init_specific_model(args.model_type, args.img_size,
                                    args.latent_dim)
        logger.info('Num parameters in model: {}'.format(get_n_param(model)))

        # TRAINS
        optimizer = optim.Adam(model.parameters(), lr=args.lr)

        model = model.to(device)  # make sure trainer and viz on same device
        gif_visualizer = GifTraversalsTraining(model, args.dataset, exp_dir)
        loss_f = get_loss_f(args.loss,
                            n_data=len(train_loader.dataset),
                            device=device,
                            **vars(args))
        trainer = Trainer(model,
                          optimizer,
                          loss_f,
                          device=device,
                          logger=logger,
                          save_dir=exp_dir,
                          is_progress_bar=not args.no_progress_bar,
                          gif_visualizer=gif_visualizer)
        trainer(
            train_loader,
            epochs=args.epochs,
            checkpoint_every=args.checkpoint_every,
        )

        # SAVE MODEL AND EXPERIMENT INFORMATION
        save_model(trainer.model, exp_dir, metadata=vars(args))

    if args.is_metrics or not args.no_test:
        model = load_model(exp_dir, is_gpu=not args.no_cuda)
        metadata = load_metadata(exp_dir)
        # TO-DO: currently uses train datatset

        test_loader = get_dataloaders(metadata["dataset"],
                                      batch_size=args.eval_batchsize,
                                      shuffle=False,
                                      logger=logger)
        loss_f = get_loss_f(args.loss,
                            n_data=len(test_loader.dataset),
                            device=device,
                            **vars(args))

        use_wandb = False
        if use_wandb:
            loss = args.loss
            wandb.init(project="atmlbetavae", config={"VAE_loss": args.loss})
            if loss == "betaH":
                beta = loss_f.beta
                wandb.config["Beta"] = beta
        evaluator = Evaluator(model,
                              loss_f,
                              device=device,
                              logger=logger,
                              save_dir=exp_dir,
                              is_progress_bar=not args.no_progress_bar,
                              use_wandb=use_wandb)

        evaluator(test_loader,
                  is_metrics=args.is_metrics,
                  is_losses=not args.no_test)
Exemple #6
0
        args.n_gpu = 1

    args.device = device

    # Setup logging
    logging.basicConfig(
        format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S',
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN)
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        args.local_rank, device, args.n_gpu, bool(args.local_rank != -1),
        args.fp16)

    # Set seed and log params
    set_seed(args)
    logger.info("Training/evaluation parameters %s", args)

    #### Get Models ####
    dis = discriminative_transformers.PretrainedDiscriminativeTransformer(args)
    dis_tokenizer = dis.tokenizer
    gen = generative_transformers.PretrainedTransformerGenerator(args)
    gen_tokenizer = gen.tokenizer
    encoder = generative_transformers.PretrainedTransformerGenerator(args)
    # lets verify that the model generates okay text
    sampled_text = gen.sample_text(2)
    print("Starting off, sampled text is ", sampled_text)

    # TODO: clean up the hardcoded amount
    if args.gen_model_type in ["gpt2", "ctrl"]:
        decoder = GRUDecoder(gen.config.n_embd, gen_tokenizer.vocab_size,
Exemple #7
0
"""
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow import keras
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from models.cnn_resnet import ResNet18, train_step, test_step
from utils.dataload import cifar10_dataset, cifar100_dataset, svhn_dataset
import matplotlib.pyplot as plt
from os.path import isfile
from utils.helpers import set_seed, normalize_img, init_epoch, data_load
from tqdm import tqdm

# setting the seed -- [5, 10, 15, 20, 25]
set_seed(15)

input_shape = (32, 32, 3)
n_classes = 100
batches = 128

ds_train, ds_test = data_load(batches, data_fn=cifar100_dataset)
model = ResNet18(input_shape, n_classes)
# optim = tf.keras.optimizers.SGD(lr=0.1, momentum=0.9, decay=0.04)
optim = tf.keras.optimizers.Adam(lr=0.0001, beta_1=0.5)
ckpt = tf.train.Checkpoint(step=tf.Variable(1), mdoel=model, optimizer=optim)
manager = tf.train.CheckpointManager(
    ckpt, directory='checkpoint/resnet18_checkpoint', max_to_keep=20)
emanager = tf.train.CheckpointManager(
    ckpt, directory='checkpoint/resnet18_echeckpoint', max_to_keep=20)
ckpt.restore(manager.latest_checkpoint)
Exemple #8
0
def main(args: argparse.Namespace):
    """Main train and evaluation function."""
    formatter = logging.Formatter(
        '%(asctime)s %(levelname)s - %(funcName)s: %(message)s', "%H:%M:%S")
    logger = logging.getLogger(__name__)
    logger.setLevel("INFO")
    stream = logging.StreamHandler()
    stream.setLevel("INFO")
    stream.setFormatter(formatter)
    logger.addHandler(stream)

    set_seed(args.seed)
    device = get_device(is_gpu=not args.no_cuda)
    exp_dir = os.path.join(RES_DIR, args.name)
    logger.info(
        f"Root directory for saving and loading experiments: {exp_dir}")

    if not args.is_eval_only:

        create_safe_directory(exp_dir, logger=logger)

        if args.loss == "factor":
            logger.info(
                "FactorVae needs 2 batches per iteration." +
                "To replicate this behavior, double batch size and epochs.")
            args.batch_size *= 2
            args.epochs *= 2

        # PREPARES DATA
        train_loader = get_dataloaders(args.dataset,
                                       noise=args.noise,
                                       batch_size=args.batch_size,
                                       logger=logger)
        logger.info(
            f"Train {args.dataset} with {len(train_loader.dataset)} samples")

        # PREPARES MODEL
        args.img_size = get_img_size(args.dataset)  # stores for metadata
        model = VAE(args.img_size, args.latent_dim)
        logger.info(f'Num parameters in model: {get_n_param(model)}')

        # TRAINS
        optimizer = optim.Adam(model.parameters(), lr=args.lr)

        model = model.to(device)
        gif_visualizer = GifTraversalsTraining(model, args.dataset, exp_dir)
        loss_f = get_loss_f(args.loss,
                            n_data=len(train_loader.dataset),
                            device=device,
                            **vars(args))

        if args.loss in ['tdGJS', 'tGJS']:
            loss_optimizer = optim.Adam(loss_f.parameters(), lr=args.lr)
        else:
            loss_optimizer = None
        print(loss_optimizer)
        trainer = Trainer(model,
                          optimizer,
                          loss_f,
                          device=device,
                          logger=logger,
                          save_dir=exp_dir,
                          is_progress_bar=not args.no_progress_bar,
                          gif_visualizer=gif_visualizer,
                          loss_optimizer=loss_optimizer,
                          denoise=args.noise is not None)
        trainer(
            train_loader,
            epochs=args.epochs,
            checkpoint_every=args.checkpoint_every,
        )

        # SAVE MODEL AND EXPERIMENT INFORMATION
        save_model(trainer.model, exp_dir, metadata=vars(args))

    # Eval
    model = load_model(exp_dir, is_gpu=not args.no_cuda)
    metadata = load_metadata(exp_dir)

    test_loader = get_dataloaders(metadata["dataset"],
                                  noise=args.noise,
                                  train=False,
                                  batch_size=128,
                                  logger=logger)
    loss_f = get_loss_f(args.loss,
                        n_data=len(test_loader.dataset),
                        device=device,
                        **vars(args))
    evaluator = Evaluator(model,
                          loss_f,
                          device=device,
                          is_metrics=args.is_metrics,
                          is_train=False,
                          logger=logger,
                          save_dir=exp_dir,
                          is_progress_bar=not args.no_progress_bar,
                          denoise=args.noise is not None)
    evaluator(test_loader)

    # Train set also
    test_loader = get_dataloaders(metadata["dataset"],
                                  train=True,
                                  batch_size=128,
                                  logger=logger)
    loss_f = get_loss_f(args.loss,
                        n_data=len(test_loader.dataset),
                        device=device,
                        **vars(args))
    evaluator = Evaluator(model,
                          loss_f,
                          device=device,
                          is_metrics=args.is_metrics,
                          is_train=True,
                          logger=logger,
                          save_dir=exp_dir,
                          is_progress_bar=not args.no_progress_bar)
    evaluator(test_loader)
Exemple #9
0
def main(args):
    """Main train and evaluation function.

    Parameters
    ----------
    args: argparse.Namespace
        Arguments
    """

    # Logging info
    formatter = logging.Formatter('%(asctime)s %(levelname)s - '
                                  '%(funcName)s: %(message)s',
                                  '%H:%M:%S')
    logger = logging.getLogger(__name__)
    logger.setLevel('INFO')
    stream = logging.StreamHandler()
    stream.setLevel('INFO')
    stream.setFormatter(formatter)
    logger.addHandler(stream)

    set_seed(args.seed)
    device = torch.device(
        'cuda' if torch.cuda.is_available() and args.cuda else 'cpu')
    model_name = f'{args.name}_lr{args.lr}_z{args.latent_dim}' \
                 + f'_h{args.hidden_dim}_p{args.p_dropout}'
    model_dir = os.path.join(args.results, model_name)
    logger.info(f'Directory for saving and loading models: {model_dir}')

    if not args.eval:
        # Model directory
        new_model_dir(model_dir, logger=logger)

        # Dataloaders
        train_loader, valid_loader = get_dataloaders(
            args.data, args.t_hours, args.n_bins,
            validation=True, dynamic=args.dynamic,
            batch_size=args.bs, logger=logger)
        logger.info(
            f'Train {args.model_type}-{args.t_hours} ' +
            f'with {len(train_loader.dataset)} samples')

        # Load model
        n_tokens = len(np.load(
            os.path.join(
                args.data, '_dicts', f'{args.t_hours}_{args.n_bins}.npy'),
            allow_pickle=True).item())
        model = init_model(
            args.model_type, n_tokens, args.latent_dim, args.hidden_dim,
            p_dropout=args.p_dropout, dt=args.dt,
            weighted=args.weighted, dynamic=args.dynamic)
        logger.info(f'#params in model: {get_n_param(model)}')

        # Optimizer
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
        loss_f = BCE()
        model = model.to(device)

        # Training
        trainer = Trainer(
            model, loss_f, optimizer,
            device=device, logger=logger, save_dir=model_dir, p_bar=args.p_bar)
        trainer.train(
            train_loader, valid_loader,
            epochs=args.epochs, early_stopping=args.early_stopping)

        # Save model
        metadata = vars(args)
        metadata['n_tokens'] = n_tokens
        save_model(trainer.model, model_dir, metadata=metadata)

    if args.test:
        # Load model
        model = load_model(model_dir, is_gpu=args.cuda)
        metadata = load_metadata(model_dir)

        # Dataloader
        test_loader, _ = get_dataloaders(
            metadata['data'], metadata['t_hours'], metadata['n_bins'],
            validation=False, dynamic=metadata['dynamic'], batch_size=128,
            shuffle=False, logger=logger)

        # Evaluate
        loss_f = BCE()
        evaluator = Trainer(
            model, loss_f,
            device=device, logger=logger, save_dir=model_dir, p_bar=args.p_bar)
        evaluator._valid_epoch(test_loader)
Exemple #10
0
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow import keras
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from models.vgg16_cbs import VGG16CustomCbs, train_step, test_step
from utils.dataload import cifar10_dataset, cifar100_dataset, svhn_dataset
import matplotlib.pyplot as plt
from os.path import isfile
from utils.helpers import set_seed, normalize_img, init_epoch, data_load
from tqdm import tqdm
from time import time

# setting the seed -- [5, 10, 15, 20, 25]
set_seed(10)

input_shape = (32, 32, 3)
n_classes = 10
batches = 256

ds_train, ds_test = data_load(batches, data_fn=svhn_dataset)
model = VGG16CustomCbs(input_shape, n_classes)
# optim = tf.keras.optimizers.SGD(lr=0.01, decay=0.05, momentum=0.9)
optim = tf.keras.optimizers.Adam(lr=0.0001, beta_1=0.5)
ckpt = tf.train.Checkpoint(step=tf.Variable(1), mdoel=model, optimizer=optim)
manager = tf.train.CheckpointManager(
    ckpt, directory='checkpoint/vgg16cbs_checkpoint', max_to_keep=20)
emanager = tf.train.CheckpointManager(
    ckpt, directory='checkpoint/vgg16cbs_echeckpoint', max_to_keep=20)
ckpt.restore(manager.latest_checkpoint)
Exemple #11
0
import torch
import os

from dataloader.utils import load_dataset
from decoding.generation import generate_beam
from decoding.greedy import greedy_search
from architecture.model import CDS
from utils.helpers import get_device, calculate_rouge, set_seed
from utils.cmdopt import parse_sum_args

logging.basicConfig(format="%(asctime)s - %(message)s", level=logging.INFO)
opt = parse_sum_args()
device = get_device()

set_seed(55)


def test(dataset, fields, model):

    already, hypothesis, references = 0, [], []

    for batch in dataset:
        if opt.tf:
            predictions = greedy_search(model, opt, batch.src, fields,
                                        opt.max_length)
        else:
            predictions, _ = generate_beam(5, model, opt, batch.src, fields)
            predictions = [p for p, _ in predictions]

        hypothesis += [fields["tgt"].decode(p) for p in predictions]
def train_generator_mle(args, train_dataset, model, tokenizer, optimizer,
                        eval_dataset):
    """ Train the model """
    if args.local_rank in [-1, 0]:
        tb_writer = SummaryWriter()

    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)

    train_sampler = RandomSampler(
        train_dataset) if args.local_rank == -1 else DistributedSampler(
            train_dataset)
    train_dataloader = DataLoader(train_dataset,
                                  sampler=train_sampler,
                                  batch_size=args.train_batch_size)

    if args.max_steps > 0:
        t_total = args.max_steps
        args.num_train_epochs = args.max_steps // (
            len(train_dataloader) // args.gradient_accumulation_steps) + 1
    else:
        t_total = len(
            train_dataloader
        ) // args.gradient_accumulation_steps * args.num_train_epochs

    # multi-gpu training (should be after apex fp16 initialization)
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Distributed training (should be after apex fp16 initialization)
    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True)

    # Train!
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_dataset))
    logger.info("  Num Epochs = %d", args.num_train_epochs)
    logger.info("  Instantaneous batch size per GPU = %d",
                args.per_gpu_train_batch_size)
    logger.info(
        "  Total train batch size (w. parallel, distributed & accumulation) = %d",
        args.train_batch_size * args.gradient_accumulation_steps *
        (torch.distributed.get_world_size() if args.local_rank != -1 else 1))
    logger.info("  Gradient Accumulation steps = %d",
                args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", t_total)

    global_step = 0
    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    train_iterator = trange(int(args.num_train_epochs),
                            desc="Epoch",
                            disable=args.local_rank not in [-1, 0])
    set_seed(
        args)  # Added here for reproducibility (even between python 2 and 3)
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader,
                              desc="Iteration",
                              disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):
            inputs, labels = mask_tokens(batch, tokenizer,
                                         args) if args.mlm else (batch, batch)
            inputs = inputs.to(args.device)
            labels = labels.to(args.device)
            model.train()
            outputs = model(inputs,
                            masked_lm_labels=labels) if args.mlm else model(
                                inputs, labels=labels)
            loss = outputs[
                0]  # model outputs are always tuple in pytorch-transformers (see doc)

            if args.n_gpu > 1:
                loss = loss.mean(
                )  # mean() to average on multi-gpu parallel training
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            if args.fp16:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            tr_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                if args.fp16:
                    torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer), args.max_grad_norm)
                else:
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   args.max_grad_norm)
                optimizer.step()
                model.zero_grad()
                global_step += 1

                if args.local_rank in [
                        -1, 0
                ] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
                    # Log metrics
                    if args.local_rank == -1 and args.evaluate_during_training:  # Only evaluate when single GPU otherwise metrics may not average well
                        results = evaluate(args, model, tokenizer)
                        for key, value in results.items():
                            tb_writer.add_scalar('eval_{}'.format(key), value,
                                                 global_step)
                    logging_loss = tr_loss

                if args.local_rank in [
                        -1, 0
                ] and args.save_steps > 0 and global_step % args.save_steps == 0:
                    # Save model checkpoint
                    output_dir = os.path.join(
                        args.output_dir, 'checkpoint-{}'.format(global_step))
                    if not os.path.exists(output_dir):
                        os.makedirs(output_dir)
                    model_to_save = model.module if hasattr(
                        model, 'module'
                    ) else model  # Take care of distributed/parallel training
                    model_to_save.model.model.save_pretrained(output_dir)
                    torch.save(args,
                               os.path.join(output_dir, 'training_args.bin'))
                    logger.info("Saving model checkpoint to %s", output_dir)

            if args.max_steps > 0 and global_step > args.max_steps:
                epoch_iterator.close()
                break
        if args.max_steps > 0 and global_step > args.max_steps:
            train_iterator.close()
            break

    if args.local_rank in [-1, 0]:
        tb_writer.close()

    return evaluate_generator_mle(args, model, tokenizer, eval_dataset)
def adversarial_train(args,
                      gen,
                      dis,
                      encoder,
                      tokenizer,
                      optimizer,
                      training_dataloader,
                      num_steps,
                      is_discriminator=True):
    set_seed(
        args)  # Added here for reproductibility (even between python 2 and 3)
    total_loss = 0

    if args.max_steps > 0:
        t_total = args.max_steps
        args.num_train_epochs = args.max_steps // (
            len(training_dataloader) // args.gradient_accumulation_steps) + 1
    else:
        t_total = len(
            training_dataloader
        ) // args.gradient_accumulation_steps * args.num_train_epochs

    # multi-gpu training (should be after apex fp16 initialization)
    if args.n_gpu > 1:
        dis = torch.nn.DataParallel(dis)
        gen = torch.nn.DataParallel(gen)

    # Distributed training (should be after apex fp16 initialization)
    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True)

    logger.info("***** Running adversarial training *****")
    logger.info("  Num examples = %d", len(training_dataloader))
    logger.info("  Instantaneous batch size per GPU = %d",
                args.per_gpu_train_batch_size)
    logger.info(
        "  Total train batch size (w. parallel, distributed & accumulation) = %d",
        args.train_batch_size * args.gradient_accumulation_steps *
        (torch.distributed.get_world_size() if args.local_rank != -1 else 1))
    logger.info("  Gradient Accumulation steps = %d",
                args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", t_total)

    train_iterator = trange(int(1),
                            desc="Epoch",
                            disable=args.local_rank not in [-1, 0])
    for _ in train_iterator:
        epoch_iterator = tqdm(training_dataloader,
                              desc="Iteration",
                              disable=args.local_rank not in [-1, 0])
        for step, (batch) in enumerate(epoch_iterator):
            dis.train() if is_discriminator else gen.train(
            )  # only optimize the one
            # Get real embeddings
            batch = tuple(t.to(args.device) for t in batch)
            batch_inputs = create_transformer_mapping(batch, "xlnet")
            real_embedding = encoder(**batch_inputs)
            d_out_real = discriminator_eval(args, real_embedding, dis,
                                            tokenizer)
            # get fake embeddings
            gen_batch = gen.sample(
                args.train_batch_size).cuda()  # (1, batch_size, embedding_dim)
            d_out_fake = discriminator_eval(args, gen_batch, dis, tokenizer)
            # compute losses and return the relevant one
            assert d_out_real.shape == d_out_fake.shape, "shapes are not aligned, error"
            if is_discriminator:
                _, loss = get_losses(d_out_real, d_out_fake, args.loss_type)
            else:
                loss, _ = get_losses(d_out_real, d_out_fake, args.loss_type)
            epoch_iterator.set_description('loss:{:.4f}'.format(loss.item()))
            epoch_iterator.update(1)
            optimize(optimizer, loss, dis if is_discriminator else gen)
            total_loss += loss.item()

    average_loss = total_loss / (
        num_steps * args.train_batch_size) if num_steps != 0 else 0
    return average_loss, optimizer, gen, dis