Exemple #1
0
    def test(self, args, nrows=2, stats_file='model_stats'):
        """Test GAN"""
        print('Init')
        init_op = tf.global_variables_initializer()
        self.sess.run(init_op)

        # read_data returns a DataSet object with next_batch method
        normal = read_data('normal/')
        defect = read_masked('masked/')

        if self.load(self.checkpoint_dir):
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed. Continuing")

        batch_idxs = 781 // self.batch_size
        start = time.time()
        for i in xrange(0, batch_idxs):
            print('Generating samples for batch %2d, time: %4.4f' % (i, time.time() - start))
            normal_test_batch = normal.next_batch(self.batch_size, which='test/', labels=True)
            defect_test_batch = defect.next_batch(self.batch_size, which='test/', labels=True)
            file_combinations = zip(normal_test_batch[1], defect_test_batch[1])

            self._samples = self.sess.run(self.generated_sample,
                                    feed_dict={self.normal_xrays: normal_test_batch[0],
                                               self.masked_bbox: defect_test_batch[0]})
            image_filename = './{}/test_{:04d}.png'.format(args.test_dir, i)
            save_images(images=self._samples, size=[nrows, self.batch_size//nrows], image_path=image_filename)
            save_stats(filename=stats_file, image_name=image_filename, labels=file_combinations)
    def train_model(self):
        """
        """
        self.train_loss, self.train_acc = [], []
        self.valid_loss, self.valid_acc = [], []
        for epoch in range(10):
            valid = self.valid_epoch()
            print(f"Validation Accuracy: {self.valid_acc[-1]}")
            print(f"Validation Loss: {self.valid_loss[-1]}")
            train = self.train_epoch()
            print(f"Training Accuracy: {self.train_acc[-1]}")
            print(f"Training Loss: {self.train_loss[-1]}")
            save_stats(train_loss=self.train_loss, valid_loss=self.valid_loss,
                       train_acc=self.train_acc, valid_acc=self.valid_acc)
        valid = self.valid_epoch()
        print(f"Final Accuracy: {self.valid_acc[-1]}")
        print(f"Final Loss: {self.valid_loss[-1]}")

        return
Exemple #3
0
    def test(self, args, stats_file='model_stats'):
        """Test GAN"""
        init_op = tf.global_variables_initializer()
        self.sess.run(init_op)

        # read_data returns a DataSet object with next_batch method
        normal = read_data('normal')
        defect = read_data('defect')

        if self.load(self.checkpoint_dir):
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed. Continuing")

        batch_idxs = defect.num_examples // self.batch_size
        batch_end_time = time.time()
        for i in batch_idxs:
            print('Generating samples for batch %2d, time: %4.4f' %
                  (i, batch_end_time - time.time()))

            normal_test_batch = normal.next_batch(self.batch_size)
            defect_test_batch = defect.next_batch(self.batch_size)
            file_combinations = zip(normal_test_batch[1], defect_test_batch[1])

            samples = self.sess.run(self.generated_sample,
                                    feed_dict={
                                        self.normal_xrays:
                                        normal_test_batch[0],
                                        self.input_bbox: defect_test_batch[0]
                                    })
            image_filename = './{}/test_{:04d}.png'.format(args.test_dir, i)
            save_images(images=samples,
                        size=[self.batch_size, 2],
                        image_path=image_filename)
            save_stats(filename=stats_file,
                       image_name=image_filename,
                       labels=file_combinations)
            batch_end_time = time.time()
def main(config):
    # create unique output directory for this model

    config['name'] = config['name'] + '-' + str(config['hidden_state_size'])
    if config['train_stride']:
        config['name'] = config['name'] + '-stride'
    if config['concat_labels']:
        config['name'] = config['name'] + '-concat_labels'
    if config['attention']:
        config['name'] = config['name'] + '-attention'
    if config['share_weights']:
        config['name'] = config['name'] + '-share_weights'

    config['name'] = config['name'] + '-' + config[
        'learning_rate_type'] + '-' + str(config['learning_rate'])

    timestamp = str(int(time.time()))
    config['model_dir'] = os.path.abspath(
        os.path.join(config['output_dir'], config['name'] + '-' + timestamp))
    os.makedirs(config['model_dir'])
    print('Writing checkpoints into {}'.format(config['model_dir']))

    # load the data, this requires that the *.npz files you downloaded from Kaggle be named `train.npz` and `valid.npz`
    data_train = load_data(config, 'train', config['train_stride'])
    data_valid = load_data(config, 'valid', config['eval_stride'])

    # TODO if you would like to do any preprocessing of the data, here would be a good opportunity
    stats = calculate_stats(data_train.input_)
    save_stats(stats)

    if config['normalize']:
        data_train.input_, _, _ = preprocess(data_train.input_)
        data_train.target, _, _ = preprocess(data_train.target)

        data_valid.input_, _, _ = preprocess(data_valid.input_)
        data_valid.target, _, _ = preprocess(data_valid.target)

        print('Post normalize samples shape: ', data_train.input_[0].shape)

    config['input_dim'] = data_train.input_[0].shape[-1]
    config['output_dim'] = data_train.target[0].shape[-1]

    # get input placeholders and get the model that we want to train
    seq2seq_model_class, placeholders = get_model_and_placeholders(config)

    # Create a variable that stores how many training iterations we performed.
    # This is useful for saving/storing the network
    global_step = tf.Variable(1, name='global_step', trainable=False)

    # create a training graph, this is the graph we will use to optimize the parameters
    with tf.name_scope('Training'):
        seq2seq_model = seq2seq_model_class(config,
                                            placeholders,
                                            mode='training')
        seq2seq_model.build_graph()
        print('created RNN model with {} parameters'.format(
            seq2seq_model.n_parameters))

        # configure learning rate
        if config['learning_rate_type'] == 'exponential':
            lr = tf.train.exponential_decay(
                config['learning_rate'],
                global_step=global_step,
                decay_steps=config['learning_rate_decay_steps'],
                decay_rate=config['learning_rate_decay_rate'],
                staircase=False)
            lr_decay_op = tf.identity(lr)
        elif config['learning_rate_type'] == 'linear':
            lr = tf.Variable(config['learning_rate'], trainable=False)
            lr_decay_op = lr.assign(
                tf.multiply(lr, config['learning_rate_decay_rate']))
        elif config['learning_rate_type'] == 'fixed':
            lr = config['learning_rate']
            lr_decay_op = tf.identity(lr)
        else:
            raise ValueError('learning rate type "{}" unknown.'.format(
                config['learning_rate_type']))

        with tf.name_scope('Optimizer'):
            # TODO choose the optimizer you desire here and define `train_op. The loss should be accessible through rnn_model.loss
            params = tf.trainable_variables()
            optimizer = tf.train.AdamOptimizer(config['learning_rate'])
            gradients = tf.gradients(seq2seq_model.loss, params)

            # clip the gradients to counter explosion
            clipped_gradients, _ = tf.clip_by_global_norm(
                gradients, config['gradient_clip'])

            # backprop
            train_op = optimizer.apply_gradients(zip(clipped_gradients,
                                                     params),
                                                 global_step=global_step)

    # create a graph for validation
    with tf.name_scope('Validation'):
        seq2seq_model_valid = seq2seq_model_class(config,
                                                  placeholders,
                                                  mode='validation')
        seq2seq_model_valid.build_graph()

    # Create summary ops for monitoring the training
    # Each summary op annotates a node in the computational graph and collects data data from it
    tf.summary.scalar('learning_rate', lr, collections=['training_summaries'])

    # Merge summaries used during training and reported after every step
    summaries_training = tf.summary.merge(
        tf.get_collection('training_summaries'))

    # create summary ops for monitoring the validation
    # caveat: we want to store the performance on the entire validation set, not just one validation batch
    # Tensorflow does not directly support this, so we must process every batch independently and then aggregate
    # the results outside of the model
    # so, we create a placeholder where can feed the aggregated result back into the model
    loss_valid_pl = tf.placeholder(tf.float32, name='loss_valid_pl')
    loss_valid_s = tf.summary.scalar('loss_valid',
                                     loss_valid_pl,
                                     collections=['validation_summaries'])

    # merge validation summaries
    summaries_valid = tf.summary.merge([loss_valid_s])

    # dump the config to the model directory in case we later want to see it
    export_config(config, os.path.join(config['model_dir'], 'config.txt'))

    with tf.Session() as sess:
        # Add the ops to initialize variables.
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        # Actually intialize the variables
        sess.run(init_op)

        # create file writers to dump summaries onto disk so that we can look at them with tensorboard
        train_summary_dir = os.path.join(config['model_dir'], "summary",
                                         "train")
        train_summary_writer = tf.summary.FileWriter(train_summary_dir,
                                                     sess.graph)
        valid_summary_dir = os.path.join(config['model_dir'], "summary",
                                         "validation")
        valid_summary_writer = tf.summary.FileWriter(valid_summary_dir,
                                                     sess.graph)

        # create a saver for writing training checkpoints
        saver = tf.train.Saver(var_list=tf.trainable_variables(),
                               max_to_keep=config['n_keep_checkpoints'])

        # start training
        start_time = time.time()
        current_step = 0
        for e in range(config['n_epochs']):

            # reshuffle the batches
            data_train.reshuffle()

            # loop through all training batches
            for i, batch in enumerate(data_train.all_batches()):
                step = tf.train.global_step(sess, global_step)
                current_step += 1

                if config[
                        'learning_rate_type'] == 'linear' and current_step % config[
                            'learning_rate_decay_steps'] == 0:
                    sess.run(lr_decay_op)

                # we want to train, so must request at least the train_op
                fetches = {
                    'summaries': summaries_training,
                    'loss': seq2seq_model.loss,
                    'train_op': train_op
                }

                # get the feed dict for the current batch
                feed_dict = seq2seq_model.get_feed_dict(batch)

                # feed data into the model and run optimization
                training_out = sess.run(fetches, feed_dict)

                # write logs
                train_summary_writer.add_summary(training_out['summaries'],
                                                 global_step=step)

                # print training performance of this batch onto console
                time_delta = str(
                    datetime.timedelta(seconds=int(time.time() - start_time)))
                print('\rEpoch: {:3d} [{:4d}/{:4d}] time: {:>8} loss: {:.4f}'.
                      format(e + 1, i + 1, data_train.n_batches, time_delta,
                             training_out['loss']),
                      end='')

            # after every epoch evaluate the performance on the validation set
            total_valid_loss = 0.0
            n_valid_samples = 0
            for batch in data_valid.all_batches():
                fetches = {'loss': seq2seq_model_valid.loss}
                feed_dict = seq2seq_model_valid.get_feed_dict(batch)
                valid_out = sess.run(fetches, feed_dict)

                total_valid_loss += valid_out['loss'] * batch.batch_size
                n_valid_samples += batch.batch_size

            # write validation logs
            avg_valid_loss = total_valid_loss / n_valid_samples
            valid_summaries = sess.run(summaries_valid,
                                       {loss_valid_pl: avg_valid_loss})
            valid_summary_writer.add_summary(valid_summaries,
                                             global_step=tf.train.global_step(
                                                 sess, global_step))

            # print validation performance onto console
            print(' | validation loss: {:.6f}'.format(avg_valid_loss))

            # save this checkpoint if necessary
            if (e + 1) % config['save_checkpoints_every_epoch'] == 0:
                saver.save(sess, os.path.join(config['model_dir'], 'model'),
                           global_step)

            if avg_valid_loss > 10 or math.isnan(avg_valid_loss) or np.isinf(
                    avg_valid_loss):
                break

        # Training finished, always save model before exiting
        print('Training finished')
        ckpt_path = saver.save(sess, os.path.join(config['model_dir'],
                                                  'model'), global_step)
        print('Model saved to file {}'.format(ckpt_path))
Exemple #5
0
            except FileExistsError:
                pass
            with torch.no_grad():
                for i in range(4):
                    h_views, v_views, i_views, d_views, center, gt, mask, index = test_set.get_scene(i)
                    data_h = torch.tensor(h_views, device=device).float()
                    data_v = torch.tensor(v_views, device=device).float()
                    data_d = torch.tensor(d_views, device=device).float()
                    predicted_h, _, _ = model(data_h)
                    predicted_h = predicted_h.view(9, 3, 128, 128)
                    predicted_v, _, _ = model(data_v)
                    predicted_v = predicted_v.view(9, 3, 128, 128)
                    mu_h, _ = model.encode(data_v)
                    mu_v, _ = model.encode(data_h)
                    predicted_d = model.decode(mu_h + mu_v).view(9, 3, 128, 128)
                    test_loss += F.l1_loss(predicted_d, data_d)
                    if i == 3:
                        print("Save predicition:")
                        show_view_sequence(predicted_h.cpu(), "horizontal", savepath=results_path)
                        show_view_sequence(predicted_v.cpu(), "vertical", savepath=results_path)
                        show_view_sequence(predicted_d.cpu(), "diagonal", savepath=results_path)
                        show_view_sequence(h_views, "h_truth", savepath=results_path)
                        show_view_sequence(v_views, "v_truth", savepath=results_path)
                        show_view_sequence(d_views, "d_truth", savepath=results_path)
                        show_view_sequence((d_views - predicted_d.cpu().numpy()), "difference", savepath=results_path, cmap="coolwarm")

                test_loss /= 4
                print('====> Test set loss: {:.4f}'.format(test_loss))
                save_stats("training_stats.csv", MODEL_NAME, "MSE", EPOCHS*4, "{:.4f}".format(test_loss))

Exemple #6
0
def main(args):
    if args.model_name is not None:
        print('Preparing to train model: {}'.format(args.model_name))

    global device
    device = torch.device(
        'cuda' if torch.cuda.is_available() and not args.cpu else 'cpu')

    sc_will_happen = args.self_critical_from_epoch != -1

    if args.validate is None and args.lr_scheduler == 'ReduceLROnPlateau':
        print(
            'ERROR: you need to enable validation in order to use default lr_scheduler (ReduceLROnPlateau)'
        )
        print('Hint: use something like --validate=coco:val2017')
        sys.exit(1)

    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Image preprocessing, normalization for the pretrained resnet
    transform = transforms.Compose([
        # transforms.Resize((256, 256)),
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    scorers = {}
    if args.validation_scoring is not None or sc_will_happen:
        assert not (
            args.validation_scoring is None and sc_will_happen
        ), "Please provide a metric when using self-critical training"
        for s in args.validation_scoring.split(','):
            s = s.lower().strip()
            if s == 'cider':
                from eval.cider import Cider
                scorers['CIDEr'] = Cider()
            if s == 'ciderd':
                from eval.ciderD.ciderD import CiderD
                scorers['CIDEr-D'] = CiderD(df=args.cached_words)

    ########################
    # Set Model parameters #
    ########################

    # Store parameters gotten from arguments separately:
    arg_params = ModelParams.fromargs(args)

    print("Model parameters inferred from command arguments: ")
    print(arg_params)
    start_epoch = 0

    ###############################
    # Load existing model state   #
    # and update Model parameters #
    ###############################

    state = None

    if args.load_model:
        try:
            state = torch.load(args.load_model, map_location=device)
        except AttributeError:
            print(
                'WARNING: Old model found. Please use model_update.py in the model before executing this script.'
            )
            exit(1)
        new_external_features = arg_params.features.external

        params = ModelParams(state, arg_params=arg_params)
        if len(new_external_features
               ) and params.features.external != new_external_features:
            print('WARNING: external features changed: ',
                  params.features.external, new_external_features)
            print('Updating feature paths...')
            params.update_ext_features(new_external_features)
        start_epoch = state['epoch']
        print('Loaded model {} at epoch {}'.format(args.load_model,
                                                   start_epoch))
    else:
        params = arg_params
        params.command_history = []

    if params.rnn_hidden_init == 'from_features' and params.skip_start_token:
        print(
            "ERROR: Please remove --skip_start_token if you want to use image features "
            " to initialize hidden and cell states. <start> token is needed to trigger "
            " the process of sequence generation, since we don't have image features "
            " embedding as the first input token.")
        sys.exit(1)

    # Force set the following hierarchical model parameters every time:
    if arg_params.hierarchical_model:
        params.hierarchical_model = True
        params.max_sentences = arg_params.max_sentences
        params.weight_sentence_loss = arg_params.weight_sentence_loss
        params.weight_word_loss = arg_params.weight_word_loss
        params.dropout_stopping = arg_params.dropout_stopping
        params.dropout_fc = arg_params.dropout_fc
        params.coherent_sentences = arg_params.coherent_sentences
        params.coupling_alpha = arg_params.coupling_alpha
        params.coupling_beta = arg_params.coupling_beta

    assert args.replace or \
        not os.path.isdir(os.path.join(args.output_root, args.model_path, get_model_name(args, params))) or \
        not (args.load_model and not args.validate_only), \
        '{} already exists. If you want to replace it or resume training please use --replace flag. ' \
        'If you want to validate a loaded model without training it, use --validate_only flag.'  \
        'Otherwise specify a different model name using --model_name flag.'\
        .format(os.path.join(args.output_root, args.model_path, get_model_name(args, params)))

    if args.load_model:
        print("Final model parameters (loaded model + command arguments): ")
        print(params)

    ##############################
    # Load dataset configuration #
    ##############################

    dataset_configs = DatasetParams(args.dataset_config_file)

    if args.dataset is None and not args.validate_only:
        print('ERROR: No dataset selected!')
        print(
            'Please supply a training dataset with the argument --dataset DATASET'
        )
        print('The following datasets are configured in {}:'.format(
            args.dataset_config_file))
        for ds, _ in dataset_configs.config.items():
            if ds not in ('DEFAULT', 'generic'):
                print(' ', ds)
        sys.exit(1)

    if args.validate_only:
        if args.load_model is None:
            print(
                'ERROR: for --validate_only you need to specify a model to evaluate using --load_model MODEL'
            )
            sys.exit(1)
    else:
        dataset_params = dataset_configs.get_params(args.dataset)

        for i in dataset_params:
            i.config_dict['no_tokenize'] = args.no_tokenize
            i.config_dict['show_tokens'] = args.show_tokens
            i.config_dict['skip_start_token'] = params.skip_start_token

            if params.hierarchical_model:
                i.config_dict['hierarchical_model'] = True
                i.config_dict['max_sentences'] = params.max_sentences
                i.config_dict['crop_regions'] = False

    if args.validate is not None:
        validation_dataset_params = dataset_configs.get_params(args.validate)
        for i in validation_dataset_params:
            i.config_dict['no_tokenize'] = args.no_tokenize
            i.config_dict['show_tokens'] = args.show_tokens
            i.config_dict['skip_start_token'] = params.skip_start_token

            if params.hierarchical_model:
                i.config_dict['hierarchical_model'] = True
                i.config_dict['max_sentences'] = params.max_sentences
                i.config_dict['crop_regions'] = False

    #######################
    # Load the vocabulary #
    #######################

    # For pre-trained models attempt to obtain
    # saved vocabulary from the model itself:
    if args.load_model and params.vocab is not None:
        print("Loading vocabulary from the model file:")
        vocab = params.vocab
    else:
        if args.vocab is None:
            print(
                "ERROR: You must specify the vocabulary to be used for training using "
                "--vocab flag.\nTry --vocab AUTO if you want the vocabulary to be "
                "either generated from the training dataset or loaded from cache."
            )
            sys.exit(1)
        print("Loading / generating vocabulary:")
        vocab = get_vocab(args, dataset_params)

    print('Size of the vocabulary is {}'.format(len(vocab)))

    ##########################
    # Initialize data loader #
    ##########################

    ext_feature_sets = [
        params.features.external, params.persist_features.external
    ]
    if not args.validate_only:
        print('Loading dataset: {} with {} workers'.format(
            args.dataset, args.num_workers))
        if params.skip_start_token:
            print("Skipping the use of <start> token...")
        data_loader, ef_dims = get_loader(
            dataset_params,
            vocab,
            transform,
            args.batch_size,
            shuffle=True,
            num_workers=args.num_workers,
            ext_feature_sets=ext_feature_sets,
            skip_images=not params.has_internal_features(),
            verbose=args.verbose,
            unique_ids=sc_will_happen)
        if sc_will_happen:
            gts_sc = get_ground_truth_captions(data_loader.dataset)

    gts_sc_valid = None
    if args.validate is not None:
        valid_loader, ef_dims = get_loader(
            validation_dataset_params,
            vocab,
            transform,
            args.batch_size,
            shuffle=True,
            num_workers=args.num_workers,
            ext_feature_sets=ext_feature_sets,
            skip_images=not params.has_internal_features(),
            verbose=args.verbose)
        gts_sc_valid = get_ground_truth_captions(
            valid_loader.dataset) if sc_will_happen else None

    #########################################
    # Setup (optional) TensorBoardX logging #
    #########################################

    writer = None
    if args.tensorboard:
        if SummaryWriter is not None:
            model_name = get_model_name(args, params)
            timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
            log_dir = os.path.join(
                args.output_root, 'log_tb/{}_{}'.format(model_name, timestamp))
            writer = SummaryWriter(log_dir=log_dir)
            print("INFO: Logging TensorBoardX events to {}".format(log_dir))
        else:
            print(
                "WARNING: SummaryWriter object not available. "
                "Hint: Please install TensorBoardX using pip install tensorboardx"
            )

    ######################
    # Build the model(s) #
    ######################

    # Set per parameter learning rate here, if supplied by the user:

    if args.lr_word_decoder is not None:
        if not params.hierarchical_model:
            print(
                "ERROR: Setting word decoder learning rate currently supported in Hierarchical Model only."
            )
            sys.exit(1)

        lr_dict = {'word_decoder': args.lr_word_decoder}
    else:
        lr_dict = {}

    model = EncoderDecoder(params,
                           device,
                           len(vocab),
                           state,
                           ef_dims,
                           lr_dict=lr_dict)

    ######################
    # Optimizer and loss #
    ######################

    sc_activated = False
    opt_params = model.get_opt_params()

    # Loss and optimizer
    if params.hierarchical_model:
        criterion = HierarchicalXEntropyLoss(
            weight_sentence_loss=params.weight_sentence_loss,
            weight_word_loss=params.weight_word_loss)
    elif args.share_embedding_weights:
        criterion = SharedEmbeddingXentropyLoss(param_lambda=0.15)
    else:
        criterion = nn.CrossEntropyLoss()

    if sc_will_happen:  # save it for later
        if args.self_critical_loss == 'sc':
            from model.loss import SelfCriticalLoss
            rl_criterion = SelfCriticalLoss()
        elif args.self_critical_loss == 'sc_with_diversity':
            from model.loss import SelfCriticalWithDiversityLoss
            rl_criterion = SelfCriticalWithDiversityLoss()
        elif args.self_critical_loss == 'sc_with_relative_diversity':
            from model.loss import SelfCriticalWithRelativeDiversityLoss
            rl_criterion = SelfCriticalWithRelativeDiversityLoss()
        elif args.self_critical_loss == 'sc_with_bleu_diversity':
            from model.loss import SelfCriticalWithBLEUDiversityLoss
            rl_criterion = SelfCriticalWithBLEUDiversityLoss()
        elif args.self_critical_loss == 'sc_with_repetition':
            from model.loss import SelfCriticalWithRepetitionLoss
            rl_criterion = SelfCriticalWithRepetitionLoss()
        elif args.self_critical_loss == 'mixed':
            from model.loss import MixedLoss
            rl_criterion = MixedLoss()
        elif args.self_critical_loss == 'mixed_with_face':
            from model.loss import MixedWithFACELoss
            rl_criterion = MixedWithFACELoss(vocab_size=len(vocab))
        elif args.self_critical_loss in [
                'sc_with_penalty', 'sc_with_penalty_throughout',
                'sc_masked_tokens'
        ]:
            raise ValueError('Deprecated loss, use \'sc\' loss')
        else:
            raise ValueError('Invalid self-critical loss')

        print('Selected self-critical loss is', rl_criterion)

        if start_epoch >= args.self_critical_from_epoch:
            criterion = rl_criterion
            sc_activated = True
            print('Self-critical loss training begins')

    # When using CyclicalLR, default learning rate should be always 1.0
    if args.lr_scheduler == 'CyclicalLR':
        default_lr = 1.
    else:
        default_lr = 0.001

    if sc_activated:
        optimizer = torch.optim.Adam(
            opt_params,
            lr=args.learning_rate if args.learning_rate else 5e-5,
            weight_decay=args.weight_decay)
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(opt_params,
                                     lr=default_lr,
                                     weight_decay=args.weight_decay)
    elif args.optimizer == 'rmsprop':
        optimizer = torch.optim.RMSprop(opt_params,
                                        lr=default_lr,
                                        weight_decay=args.weight_decay)
    elif args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(opt_params,
                                    lr=default_lr,
                                    weight_decay=args.weight_decay)
    else:
        print('ERROR: unknown optimizer:', args.optimizer)
        sys.exit(1)

    # We don't want to initialize the optimizer if we are transfering
    # the language model from the regular model to hierarchical model
    transfer_language_model = False

    if arg_params.hierarchical_model and state and not state.get(
            'hierarchical_model'):
        transfer_language_model = True

    # Set optimizer state to the one found in a loaded model, unless
    # we are doing a transfer learning step from flat to hierarchical model,
    # or we are using self-critical loss,
    # or the number of unique parameter groups has changed, or the user
    # has explicitly told us *not to* reuse optimizer parameters from before
    if state and not transfer_language_model and not sc_activated and not args.optimizer_reset:
        # Check that number of parameter groups is the same
        if len(optimizer.param_groups) == len(
                state['optimizer']['param_groups']):
            optimizer.load_state_dict(state['optimizer'])

    # override lr if set explicitly in arguments -
    # 1) Global learning rate:
    if args.learning_rate:
        for param_group in optimizer.param_groups:
            param_group['lr'] = args.learning_rate
        params.learning_rate = args.learning_rate
    else:
        params.learning_rate = default_lr

    # 2) Parameter-group specific learning rate:
    if args.lr_word_decoder is not None:
        # We want to give user an option to set learning rate for word_decoder
        # separately. Other exceptions can be added as needed:
        for param_group in optimizer.param_groups:
            if param_group.get('name') == 'word_decoder':
                param_group['lr'] = args.lr_word_decoder
                break

    if args.validate is not None and args.lr_scheduler == 'ReduceLROnPlateau':
        print('Using ReduceLROnPlateau learning rate scheduler')
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               'min',
                                                               verbose=True,
                                                               patience=2)
    elif args.lr_scheduler == 'StepLR':
        print('Using StepLR learning rate scheduler with step_size {}'.format(
            args.lr_step_size))
        # Decrease the learning rate by the factor of gamma at every
        # step_size epochs (for example every 5 or 10 epochs):
        step_size = args.lr_step_size
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                    step_size,
                                                    gamma=0.5,
                                                    last_epoch=-1)
    elif args.lr_scheduler == 'CyclicalLR':
        print(
            "Using Cyclical learning rate scheduler, lr range: [{},{}]".format(
                args.lr_cyclical_min, args.lr_cyclical_max))

        step_size = len(data_loader)
        clr = cyclical_lr(step_size,
                          min_lr=args.lr_cyclical_min,
                          max_lr=args.lr_cyclical_max)
        n_groups = len(optimizer.param_groups)
        scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
                                                      [clr] * n_groups)
    elif args.lr_scheduler is not None:
        print('ERROR: Invalid learing rate scheduler specified: {}'.format(
            args.lr_scheduler))
        sys.exit(1)

    ###################
    # Train the model #
    ###################

    stats_postfix = None
    if args.validate_only:
        stats_postfix = args.validate
    if args.load_model:
        all_stats = init_stats(args, params, postfix=stats_postfix)
    else:
        all_stats = {}

    if args.force_epoch:
        start_epoch = args.force_epoch - 1

    if not args.validate_only:
        total_step = len(data_loader)
        print(
            'Start training with start_epoch={:d} num_epochs={:d} num_batches={:d} ...'
            .format(start_epoch, args.num_epochs, args.num_batches))

    if args.teacher_forcing != 'always':
        print('\t k: {}'.format(args.teacher_forcing_k))
        print('\t beta: {}'.format(args.teacher_forcing_beta))
    print('Optimizer:', optimizer)

    if args.validate_only:
        stats = {}
        teacher_p = 1.0
        if args.teacher_forcing != 'always':
            print(
                'WARNING: teacher_forcing!=always, not yet implemented for --validate_only mode'
            )

        epoch = start_epoch - 1
        if str(epoch +
               1) in all_stats.keys() and args.skip_existing_validations:
            print('WARNING: epoch {} already validated, skipping...'.format(
                epoch + 1))
            return

        val_loss = do_validate(model, valid_loader, criterion, scorers, vocab,
                               teacher_p, args, params, stats, epoch,
                               sc_activated, gts_sc_valid)
        all_stats[str(epoch + 1)] = stats
        save_stats(args, params, all_stats, postfix=stats_postfix)
    else:
        for epoch in range(start_epoch, args.num_epochs):
            stats = {}
            begin = datetime.now()

            total_loss = 0

            if params.hierarchical_model:
                total_loss_sent = 0
                total_loss_word = 0

            num_batches = 0
            vocab_counts = {
                'cnt': 0,
                'max': 0,
                'min': 9999,
                'sum': 0,
                'unk_cnt': 0,
                'unk_sum': 0
            }

            # If start self critical training
            if not sc_activated and sc_will_happen and epoch >= args.self_critical_from_epoch:
                if all_stats:
                    best_ep, best_cider = max(
                        [(ep, all_stats[ep]['validation_cider'])
                         for ep in all_stats],
                        key=lambda x: x[1])
                    print('Loading model from epoch', best_ep,
                          'which has the better score with', best_cider)
                    state = torch.load(
                        get_model_path(args, params, int(best_ep)))
                    model = EncoderDecoder(params,
                                           device,
                                           len(vocab),
                                           state,
                                           ef_dims,
                                           lr_dict=lr_dict)
                    opt_params = model.get_opt_params()

                optimizer = torch.optim.Adam(opt_params,
                                             lr=5e-5,
                                             weight_decay=args.weight_decay)
                criterion = rl_criterion
                print('Self-critical loss training begins')
                sc_activated = True

            for i, data in enumerate(data_loader):

                if params.hierarchical_model:
                    (images, captions, lengths, image_ids, features,
                     sorting_order, last_sentence_indicator) = data
                    sorting_order = sorting_order.to(device)
                else:
                    (images, captions, lengths, image_ids, features) = data

                if epoch == 0:
                    unk = vocab('<unk>')
                    for j in range(captions.shape[0]):
                        # Flatten the caption in case it's a paragraph
                        # this is harmless for regular captions too:
                        xl = captions[j, :].view(-1)
                        xw = xl > unk
                        xu = xl == unk
                        xwi = sum(xw).item()
                        xui = sum(xu).item()
                        vocab_counts['cnt'] += 1
                        vocab_counts['sum'] += xwi
                        vocab_counts['max'] = max(vocab_counts['max'], xwi)
                        vocab_counts['min'] = min(vocab_counts['min'], xwi)
                        vocab_counts['unk_cnt'] += xui > 0
                        vocab_counts['unk_sum'] += xui
                # Set mini-batch dataset
                images = images.to(device)
                captions = captions.to(device)

                # Remove <start> token from targets if we are initializing the RNN
                # hidden state from image features:
                if params.rnn_hidden_init == 'from_features' and not params.hierarchical_model:
                    # Subtract one from all lengths to match new target lengths:
                    lengths = [x - 1 if x > 0 else x for x in lengths]
                    targets = pack_padded_sequence(captions[:, 1:],
                                                   lengths,
                                                   batch_first=True)[0]
                else:
                    if params.hierarchical_model:
                        targets = prepare_hierarchical_targets(
                            last_sentence_indicator, args.max_sentences,
                            lengths, captions, device)
                    else:
                        targets = pack_padded_sequence(captions,
                                                       lengths,
                                                       batch_first=True)[0]
                        sorting_order = None

                init_features = features[0].to(device) if len(
                    features) > 0 and features[0] is not None else None
                persist_features = features[1].to(device) if len(
                    features) > 1 and features[1] is not None else None

                # Forward, backward and optimize
                # Calculate the probability whether to use teacher forcing or not:

                # Iterate over batches:
                iteration = (epoch - start_epoch) * len(data_loader) + i

                teacher_p = get_teacher_prob(args.teacher_forcing_k, iteration,
                                             args.teacher_forcing_beta)

                # Allow model to log values at the last batch of the epoch
                writer_data = None
                if writer and (i == len(data_loader) - 1
                               or i == args.num_batches - 1):
                    writer_data = {'writer': writer, 'epoch': epoch + 1}

                sample_len = captions.size(1) if args.self_critical_loss in [
                    'mixed', 'mixed_with_face'
                ] else 20
                if sc_activated:
                    sampled_seq, sampled_log_probs, outputs = model.sample(
                        images,
                        init_features,
                        persist_features,
                        max_seq_length=sample_len,
                        start_token_id=vocab('<start>'),
                        trigram_penalty_alpha=args.trigram_penalty_alpha,
                        stochastic_sampling=True,
                        output_logprobs=True,
                        output_outputs=True)
                    sampled_seq = model.decoder.alt_prob_to_tensor(
                        sampled_seq, device=device)
                else:
                    outputs = model(images,
                                    init_features,
                                    captions,
                                    lengths,
                                    persist_features,
                                    teacher_p,
                                    args.teacher_forcing,
                                    sorting_order,
                                    writer_data=writer_data)

                if args.share_embedding_weights:
                    # Weights of (HxH) projection matrix used for regularizing
                    # models that share embedding weights
                    projection = model.decoder.projection.weight
                    loss = criterion(projection, outputs, targets)
                elif sc_activated:
                    # get greedy decoding baseline
                    model.eval()
                    with torch.no_grad():
                        greedy_sampled_seq = model.sample(
                            images,
                            init_features,
                            persist_features,
                            max_seq_length=sample_len,
                            start_token_id=vocab('<start>'),
                            trigram_penalty_alpha=args.trigram_penalty_alpha,
                            stochastic_sampling=False)
                        greedy_sampled_seq = model.decoder.alt_prob_to_tensor(
                            greedy_sampled_seq, device=device)
                    model.train()

                    if args.self_critical_loss in [
                            'sc', 'sc_with_diversity',
                            'sc_with_relative_diversity',
                            'sc_with_bleu_diversity', 'sc_with_repetition'
                    ]:
                        loss, advantage = criterion(
                            sampled_seq,
                            sampled_log_probs,
                            greedy_sampled_seq, [gts_sc[i] for i in image_ids],
                            scorers,
                            vocab,
                            return_advantage=True)
                    elif args.self_critical_loss in ['mixed']:
                        loss, advantage = criterion(
                            sampled_seq,
                            sampled_log_probs,
                            outputs,
                            greedy_sampled_seq, [gts_sc[i] for i in image_ids],
                            scorers,
                            vocab,
                            targets,
                            lengths,
                            gamma_ml_rl=args.gamma_ml_rl,
                            return_advantage=True)
                    elif args.self_critical_loss in ['mixed_with_face']:
                        loss, advantage = criterion(
                            sampled_seq,
                            sampled_log_probs,
                            outputs,
                            greedy_sampled_seq, [gts_sc[i] for i in image_ids],
                            scorers,
                            vocab,
                            captions,
                            targets,
                            lengths,
                            gamma_ml_rl=args.gamma_ml_rl,
                            return_advantage=True)
                    else:
                        raise ValueError('Invalid self-critical loss')

                    if writer is not None and i % 100 == 0:
                        writer.add_scalar('training_loss', loss.item(),
                                          epoch * len(data_loader) + i)
                        writer.add_scalar('advantage', advantage,
                                          epoch * len(data_loader) + i)
                        writer.add_scalar('lr',
                                          optimizer.param_groups[0]['lr'],
                                          epoch * len(data_loader) + i)
                else:
                    loss = criterion(outputs, targets)

                model.zero_grad()
                loss.backward()

                # Clip gradients if desired:
                if args.grad_clip is not None:
                    # grad_norms = [x.grad.data.norm(2) for x in opt_params]
                    # batch_max_grad = np.max(grad_norms)
                    # if batch_max_grad > 10.0:
                    #     print('WARNING: gradient norms larger than 10.0')

                    # torch.nn.utils.clip_grad_norm_(decoder.parameters(), 0.1)
                    # torch.nn.utils.clip_grad_norm_(encoder.parameters(), 0.1)
                    clip_gradients(optimizer, args.grad_clip)

                # Update weights:
                optimizer.step()

                # CyclicalLR requires us to update LR at every minibatch:
                if args.lr_scheduler == 'CyclicalLR':
                    scheduler.step()

                total_loss += loss.item()

                num_batches += 1

                if params.hierarchical_model:
                    _, loss_sent, _, loss_word = criterion.item_terms()
                    total_loss_sent += float(loss_sent)
                    total_loss_word += float(loss_word)

                # Print log info
                if (i + 1) % args.log_step == 0:
                    print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, '
                          'Perplexity: {:5.4f}'.format(epoch + 1,
                                                       args.num_epochs, i + 1,
                                                       total_step, loss.item(),
                                                       np.exp(loss.item())))
                    sys.stdout.flush()

                    if params.hierarchical_model:
                        weight_sent, loss_sent, weight_word, loss_word = criterion.item_terms(
                        )
                        print('Sentence Loss: {:.4f}, '
                              'Word Loss: {:.4f}'.format(
                                  float(loss_sent), float(loss_word)))
                        sys.stdout.flush()

                if i + 1 == args.num_batches:
                    break

            end = datetime.now()

            stats['training_loss'] = total_loss / num_batches

            if params.hierarchical_model:
                stats['loss_sentence'] = total_loss_sent / num_batches
                stats['loss_word'] = total_loss_word / num_batches

            print('Epoch {} duration: {}, average loss: {:.4f}'.format(
                epoch + 1, end - begin, stats['training_loss']))

            save_model(args, params, model.encoder, model.decoder, optimizer,
                       epoch, vocab)

            if epoch == 0:
                vocab_counts['avg'] = vocab_counts['sum'] / vocab_counts['cnt']
                vocab_counts['unk_cnt_per'] = 100 * vocab_counts[
                    'unk_cnt'] / vocab_counts['cnt']
                vocab_counts['unk_sum_per'] = 100 * vocab_counts[
                    'unk_sum'] / vocab_counts['sum']
                # print(vocab_counts)
                print((
                    'Training data contains {sum} words in {cnt} captions (avg. {avg:.1f} w/c)'
                    + ' with {unk_sum} <unk>s ({unk_sum_per:.1f}%)' +
                    ' in {unk_cnt} ({unk_cnt_per:.1f}%) captions').format(
                        **vocab_counts))

            ############################################
            # Validation loss and learning rate update #
            ############################################

            if args.validate is not None and (epoch +
                                              1) % args.validation_step == 0:
                val_loss = do_validate(model, valid_loader, criterion, scorers,
                                       vocab, teacher_p, args, params, stats,
                                       epoch, sc_activated, gts_sc_valid)

                if args.lr_scheduler == 'ReduceLROnPlateau':
                    scheduler.step(val_loss)
            elif args.lr_scheduler == 'StepLR':
                scheduler.step()

            all_stats[str(epoch + 1)] = stats
            save_stats(args, params, all_stats, writer=writer)

            if writer is not None:
                # Log model data to tensorboard
                log_model_data(params, model, epoch + 1, writer)

    if writer is not None:
        writer.close()
Exemple #7
0
def train(seed=0,
          dataset='grid',
          samplers=(UniformDatasetSampler, UniformLatentSampler),
          latent_dim=2,
          model_dim=256,
          device='cuda',
          conditional=False,
          learning_rate=2e-4,
          betas=(0.5, 0.9),
          batch_size=256,
          iterations=400,
          n_critic=5,
          objective='gan',
          gp_lambda=10,
          output_dir='results',
          plot=False,
          spec_norm=True):

    experiment_name = [
        seed, dataset, samplers[0].__name__, samplers[1].__name__, latent_dim,
        model_dim, device, conditional, learning_rate, betas[0], betas[1],
        batch_size, iterations, n_critic, objective, gp_lambda, plot, spec_norm
    ]
    experiment_name = '_'.join([str(p) for p in experiment_name])
    results_dir = os.path.join(output_dir, experiment_name)
    network_dir = os.path.join(results_dir, 'networks')
    eval_log = os.path.join(results_dir, 'eval.log')

    os.makedirs(results_dir, exist_ok=True)
    os.makedirs(network_dir, exist_ok=True)

    eval_file = open(eval_log, 'w')

    if plot:
        samples_dir = os.path.join(results_dir, 'samples')
        os.makedirs(samples_dir, exist_ok=True)

    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    data, labels = load_data(dataset)
    data_dim, num_classes = data.shape[1], len(set(labels))

    data_sampler = samplers[0](
        torch.tensor(data).float(),
        torch.tensor(labels).long()) if conditional else samplers[0](
            torch.tensor(data).float())
    noise_sampler = samplers[1](
        latent_dim, labels) if conditional else samplers[1](latent_dim)

    if conditional:
        test_data, test_labels = load_data(dataset, split='test')
        test_dataset = TensorDataset(
            torch.tensor(test_data).to(device).float(),
            torch.tensor(test_labels).to(device).long())
        test_dataloader = DataLoader(test_dataset, batch_size=4096)

        G = Generator(latent_dim + num_classes, model_dim,
                      data_dim).to(device).train().train()
        D = Discriminator(model_dim,
                          data_dim + num_classes,
                          spec_norm=spec_norm).to(device).train()

        C_real = Classifier(model_dim, data_dim,
                            num_classes).to(device).train()
        C_fake = Classifier(model_dim, data_dim,
                            num_classes).to(device).train()
        C_fake.load_state_dict(deepcopy(C_real.state_dict()))

        C_real_optimizer = optim.Adam(C_real.parameters(),
                                      lr=2 * learning_rate)
        C_fake_optimizer = optim.Adam(C_fake.parameters(),
                                      lr=2 * learning_rate)
        C_crit = nn.CrossEntropyLoss()
    else:
        G = Generator(latent_dim, model_dim, data_dim).to(device).train()
        D = Discriminator(model_dim, data_dim,
                          spec_norm=spec_norm).to(device).train()

    D_optimizer = optim.Adam(D.parameters(), lr=learning_rate, betas=betas)
    G_optimizer = optim.Adam(G.parameters(), lr=learning_rate, betas=betas)

    if objective == 'gan':
        fake_target = torch.zeros(batch_size, 1).to(device)
        real_target = torch.ones(batch_size, 1).to(device)
    elif objective == 'wgan':
        grad_target = torch.ones(batch_size, 1).to(device)
    elif objective == 'hinge':
        bound = torch.zeros(batch_size, 1).to(device)
        sub = torch.ones(batch_size, 1).to(device)

    stats = {'D': [], 'G': [], 'C_it': [], 'C_real': [], 'C_fake': []}
    if plot:
        fixed_latent_batch = noise_sampler.get_batch(20000)
        sample_figure = plt.figure(num=0, figsize=(5, 5))
        loss_figure = plt.figure(num=1, figsize=(10, 5))
        if conditional:
            accuracy_figure = plt.figure(num=2, figsize=(10, 5))

    for it in range(iterations + 1):
        # Train Discriminator
        data_batch = data_sampler.get_batch(batch_size)
        latent_batch = noise_sampler.get_batch(batch_size)

        if conditional:
            x_real, y_real = data_batch[0].to(device), data_batch[1].to(device)
            real_sample = torch.cat([x_real, y_real], dim=1)

            z_fake, y_fake = latent_batch[0].to(device), latent_batch[1].to(
                device)
            x_fake = G(torch.cat([z_fake, y_fake], dim=1)).detach()
            fake_sample = torch.cat([x_fake, y_fake], dim=1)

        else:
            x_real = data_batch.to(device)
            real_sample = x_real

            z_fake = latent_batch.to(device)
            x_fake = G(z_fake).detach()
            fake_sample = x_fake

        D.zero_grad()
        real_pred = D(real_sample)
        fake_pred = D(fake_sample)

        if is_recorded(data_sampler):
            data_sampler.record(real_pred.detach().cpu().numpy())

        if is_weighted(data_sampler):
            weights = torch.tensor(
                data_sampler.get_weights()).to(device).float().view(
                    real_pred.shape)
        else:
            weights = torch.ones_like(real_pred).to(device)

        if objective == 'gan':
            D_loss = F.binary_cross_entropy(fake_pred, fake_target).mean() + (
                weights *
                F.binary_cross_entropy(real_pred, real_target)).mean()
            stats['D'].append(D_loss.item())

        elif objective == 'wgan':
            alpha = torch.rand(batch_size,
                               1).expand(real_sample.size()).to(device)
            interpolate = (alpha * real_sample +
                           (1 - alpha) * fake_sample).requires_grad_(True)
            gradients = torch.autograd.grad(outputs=D(interpolate),
                                            inputs=interpolate,
                                            grad_outputs=grad_target,
                                            create_graph=True,
                                            retain_graph=True,
                                            only_inputs=True)[0]

            gradient_penalty = (gradients.norm(2, dim=1) -
                                1).pow(2).mean() * gp_lambda

            D_loss = fake_pred.mean() - (real_pred * weights).mean()
            stats['D'].append(-D_loss.item())
            D_loss += gradient_penalty

        elif objective == 'hinge':
            D_loss = -(torch.min(real_pred - sub, bound) *
                       weights).mean() - torch.min(-fake_pred - sub,
                                                   bound).mean()
            stats['D'].append(D_loss.item())

        D_loss.backward()
        D_optimizer.step()

        # Train Generator
        if it % n_critic == 0:
            G.zero_grad()

            latent_batch = noise_sampler.get_batch(batch_size)

            if conditional:
                z_fake, y_fake = latent_batch[0].to(
                    device), latent_batch[1].to(device)
                x_fake = G(torch.cat([z_fake, y_fake], dim=1))
                fake_pred = D(torch.cat([x_fake, y_fake], dim=1))
            else:
                z_fake = latent_batch.to(device)
                x_fake = G(z_fake)
                fake_pred = D(x_fake)

            if objective == 'gan':
                G_loss = F.binary_cross_entropy(fake_pred, real_target).mean()
                stats['G'].extend([G_loss.item()] * n_critic)
            elif objective == 'wgan':
                G_loss = -fake_pred.mean()
                stats['G'].extend([-G_loss.item()] * n_critic)
            elif objective == 'hinge':
                G_loss = -fake_pred.mean()
                stats['G'].extend([-G_loss.item()] * n_critic)

            G_loss.backward()
            G_optimizer.step()

        if conditional:
            # Train fake classifier
            C_fake.train()

            C_fake.zero_grad()
            C_fake_loss = C_crit(C_fake(x_fake.detach()), y_fake.argmax(1))
            C_fake_loss.backward()
            C_fake_optimizer.step()

            # Train real classifier
            C_real.train()

            C_real.zero_grad()
            C_real_loss = C_crit(C_real(x_real), y_real.argmax(1))
            C_real_loss.backward()
            C_real_optimizer.step()

        if it % 5 == 0:
            C_real.eval()
            C_fake.eval()
            real_correct, fake_correct, total = 0.0, 0.0, 0.0
            for idx, (sample, label) in enumerate(test_dataloader):
                real_correct += (
                    C_real(sample).argmax(1).view(-1) == label).sum()
                fake_correct += (
                    C_fake(sample).argmax(1).view(-1) == label).sum()
                total += sample.shape[0]

            stats['C_it'].append(it)
            stats['C_real'].append(real_correct.item() / total)
            stats['C_fake'].append(fake_correct.item() / total)

            line = f"{it}\t{stats['D'][-1]:.3f}\t{stats['G'][-1]:.3f}"
            if conditional:
                line += f"\t{stats['C_real'][-1]*100:.3f}\t{stats['C_fake'][-1]*100:.3f}"

            print(line, eval_file)

            if plot:
                if conditional:
                    z_fake, y_fake = fixed_latent_batch[0].to(
                        device), fixed_latent_batch[1].to(device)
                    x_fake = G(torch.cat([z_fake, y_fake], dim=1))
                else:
                    z_fake = fixed_latent_batch.to(device)
                    x_fake = G(z_fake)

                generated = x_fake.detach().cpu().numpy()

                plt.figure(0)
                plt.clf()
                plt.scatter(generated[:, 0],
                            generated[:, 1],
                            marker='.',
                            color=(0, 1, 0, 0.01))
                plt.axis('equal')
                plt.xlim(-1, 1)
                plt.ylim(-1, 1)
                plt.savefig(os.path.join(samples_dir, f'{it}.png'))

                plt.figure(1)
                plt.clf()
                plt.plot(stats['G'], label='Generator')
                plt.plot(stats['D'], label='Discriminator')
                plt.legend()
                plt.savefig(os.path.join(results_dir, 'loss.png'))

                if conditional:
                    plt.figure(2)
                    plt.clf()
                    plt.plot(stats['C_it'], stats['C_real'], label='Real')
                    plt.plot(stats['C_it'], stats['C_fake'], label='Fake')
                    plt.legend()
                    plt.savefig(os.path.join(results_dir, 'accuracy.png'))

    save_model(G, os.path.join(network_dir, 'G_trained.pth'))
    save_model(D, os.path.join(network_dir, 'D_trained.pth'))
    save_stats(stats, os.path.join(results_dir, 'stats.pth'))
    if conditional:
        save_model(C_real, os.path.join(network_dir, 'C_real_trained.pth'))
        save_model(C_fake, os.path.join(network_dir, 'C_fake_trained.pth'))
    eval_file.close()