def train(config, gen, disc_f, disc_h, disc_j, model_en, train_data):

    # Start training
    # Define optimizers
    disc_optimizer = tf.optimizers.Adam(learning_rate=config.lr_disc,
                                        beta_1=config.beta_1_disc,
                                        beta_2=config.beta_2_disc)

    gen_en_optimizer = tf.optimizers.Adam(learning_rate=config.lr_gen_en,
                                          beta_1=config.beta_1_gen_en,
                                          beta_2=config.beta_2_gen_en)

    # Define Logging to Tensorboard
    summary_writer = tf.summary.create_file_writer(
        f'{config.result_path}/{config.model}_{config.dataset}_{time.strftime("%Y-%m-%d--%H-%M-%S")}'
    )

    fixed_z, fixed_c = get_fixed_random(
        config,
        num_to_generate=100)  # fixed_noise is just used for visualization.

    # Define metric
    metric_loss_gen_en = tf.keras.metrics.Mean()
    metric_loss_disc = tf.keras.metrics.Mean()

    # Start training
    epoch_tf = tf.Variable(0, trainable=False, dtype=tf.float32)
    for epoch in range(config.num_epochs):
        logging.info(f'Start epoch {epoch+1} ...')  # logs a message.
        epoch_tf.assign(epoch)
        start_time = time.time()

        train_epoch(train_data, gen, disc_f, disc_h, disc_j, model_en,
                    disc_optimizer, gen_en_optimizer, metric_loss_disc,
                    metric_loss_gen_en, config.train_batch_size,
                    config.num_cont_noise, config)
        epoch_time = time.time() - start_time

        # Save results
        logging.info(
            f'Epoch {epoch+1}: Disc_loss: {metric_loss_disc.result()}, Gen_loss: {metric_loss_gen_en.result()}, Time: {epoch_time}'
        )
        with summary_writer.as_default():
            tf.summary.scalar('Generator and Encoder loss',
                              metric_loss_gen_en.result(),
                              step=epoch)
            tf.summary.scalar('Discriminator loss',
                              metric_loss_disc.result(),
                              step=epoch)

        metric_loss_gen_en.reset_states()

        metric_loss_disc.reset_states()
        # Generated images and reconstructed images
        gen_image = generate_images(gen, fixed_z, fixed_c, config)
        with summary_writer.as_default():
            tf.summary.image('Generated Images',
                             tf.expand_dims(gen_image, axis=0),
                             step=epoch)
def train(config,
          gen,
          disc_f,
          disc_h,
          disc_j,
          model_en,
          train_data,
          train_data_repeat,
          model_copies=None):

    if config.load_model or config.do_eval:
        if config.load_from_further:
            config_dir = Path('~/models/dg_further/configs').expanduser()
        else:
            config_dir = Path('~/models/dg/exp/configs').expanduser()
        for cdir in config_dir.iterdir():
            ldir = cdir / 'logs'
            try:
                cmdline = (ldir / 'cmdlog.txt').read_text()
                cparams = dict(k[2:].split('=') for k in cmdline.split()
                               if k.startswith('--'))
                print(cparams)
                if (cparams['dataset'] == config.dataset
                        and cparams['train_dg'] == str(config.train_dg)
                        and cparams['steps_dg'] == str(config.steps_dg)
                        and cparams['conditional'] == str(config.conditional)):
                    checkpoint_dir = list(ldir.glob('*/checkpoints'))[0]
                    break
            except:
                pass
        else:
            raise ValueError('Model not found')

    train_data_iterator = iter(train_data_repeat)
    image, label = next(train_data_iterator)

    if config.train_dg or config.do_eval or config.load_model:
        forward_pass_fn_d_const = get_forward_pass_fn()
        forward_pass_fn_g_const = get_forward_pass_fn()
        forward_pass_fn_d_const(image,
                                label,
                                model_copies[gen],
                                disc_f,
                                disc_h,
                                disc_j,
                                model_copies[model_en],
                                config.train_batch_size,
                                config.num_cont_noise,
                                config,
                                update_d=False)
        forward_pass_fn_g_const(image,
                                label,
                                gen,
                                model_copies[disc_f],
                                model_copies[disc_h],
                                model_copies[disc_j],
                                model_en,
                                config.train_batch_size,
                                config.num_cont_noise,
                                config,
                                update_g=False)
    else:
        forward_pass_fn_d_const = get_forward_pass_fn()
        forward_pass_fn_d_const(image, label, gen, disc_f, disc_h, disc_j,
                                model_en, config.train_batch_size,
                                config.num_cont_noise, config)
        forward_pass_fn_g_const = None

    # Start training
    # Define optimizers
    disc_optimizer = tf.optimizers.Adam(learning_rate=config.lr_disc,
                                        beta_1=config.beta_1_disc,
                                        beta_2=config.beta_2_disc)

    gen_en_optimizer = tf.optimizers.Adam(learning_rate=config.lr_gen_en,
                                          beta_1=config.beta_1_gen_en,
                                          beta_2=config.beta_2_gen_en)

    dg_optimizer_d = tf.optimizers.Adam(learning_rate=config.lr_dg,
                                        beta_1=config.beta_1_disc,
                                        beta_2=config.beta_2_disc)

    dg_optimizer_g = tf.optimizers.Adam(learning_rate=config.lr_dg,
                                        beta_1=config.beta_1_gen_en,
                                        beta_2=config.beta_2_gen_en)

    out_dir = f'{config.result_path}/{config.model}_{config.dataset}_{time.strftime("%Y-%m-%d--%H-%M-%S")}'
    # Define Logging to Tensorboard
    summary_writer = tf.summary.create_file_writer(out_dir)

    fixed_z, fixed_c = get_fixed_random(
        config,
        num_to_generate=100)  # fixed_noise is just used for visualization.

    # Define metric
    metric_loss_gen_en = tf.keras.metrics.Mean()
    metric_loss_disc = tf.keras.metrics.Mean()
    metric_loss_dg = tf.keras.metrics.Mean()

    if config.train_dg or config.do_eval or config.load_model:
        train_step_fn_d_const = get_train_step_fn(forward_pass_fn_d_const)
        train_step_fn_g_const = get_train_step_fn(forward_pass_fn_g_const)
        train_step_fn_d_const(image,
                              label,
                              model_copies[gen],
                              disc_f,
                              disc_h,
                              disc_j,
                              model_copies[model_en],
                              disc_optimizer,
                              gen_en_optimizer,
                              metric_loss_disc,
                              metric_loss_gen_en,
                              config.train_batch_size,
                              config.num_cont_noise,
                              config,
                              update_d=False)
        train_step_fn_g_const(image,
                              label,
                              gen,
                              model_copies[disc_f],
                              model_copies[disc_h],
                              model_copies[disc_j],
                              model_en,
                              disc_optimizer,
                              gen_en_optimizer,
                              metric_loss_disc,
                              metric_loss_gen_en,
                              config.train_batch_size,
                              config.num_cont_noise,
                              config,
                              update_g=False)
    else:
        train_step_fn_d_const = get_train_step_fn(forward_pass_fn_d_const)
        train_step_fn_d_const(image, label, gen, disc_f, disc_h, disc_j,
                              model_en, disc_optimizer, gen_en_optimizer,
                              metric_loss_disc, metric_loss_gen_en,
                              config.train_batch_size, config.num_cont_noise,
                              config)
        train_step_fn_g_const = None

    ckpt = tf.train.Checkpoint(gen=gen,
                               disc_f=disc_f,
                               disc_h=disc_h,
                               disc_j=disc_j,
                               model_en=model_en)
    if config.save_model:
        ckpt_mgr = tf.train.CheckpointManager(ckpt, out_dir + '/checkpoints/',
                                              config.num_epochs + 1)
        ckpt_mgr.save(0)
    if config.load_model and not config.do_eval:
        ckpt.restore(tf.train.latest_checkpoint(
            str(checkpoint_dir))).assert_consumed()

    # Start training
    epoch_tf = tf.Variable(0, trainable=False, dtype=tf.float32)
    for epoch in range(0, config.num_epochs + config.do_eval):
        logging.info(f'Start epoch {epoch+1} ...')  # logs a message.
        epoch_tf.assign(epoch)
        start_time = time.time()

        if config.do_eval:
            ckpt.restore(str(checkpoint_dir) +
                         f'/ckpt-{epoch}').assert_consumed()

        train_epoch(train_step_fn_d_const,
                    train_step_fn_g_const,
                    forward_pass_fn_d_const,
                    forward_pass_fn_g_const,
                    train_data,
                    train_data_iterator,
                    gen,
                    disc_f,
                    disc_h,
                    disc_j,
                    model_en,
                    disc_optimizer,
                    gen_en_optimizer,
                    dg_optimizer_g,
                    dg_optimizer_d,
                    metric_loss_disc,
                    metric_loss_gen_en,
                    metric_loss_dg,
                    config.train_batch_size,
                    config.num_cont_noise,
                    config,
                    model_copies=model_copies)
        epoch_time = time.time() - start_time

        # Save results
        logging.info(
            f'Epoch {epoch+1}: Disc_loss: {metric_loss_disc.result()}, Gen_loss: {metric_loss_gen_en.result()}, Time: {epoch_time}, DG: {metric_loss_dg.result()}'
        )
        with summary_writer.as_default():
            tf.summary.scalar('Generator and Encoder loss',
                              metric_loss_gen_en.result(),
                              step=epoch)
            tf.summary.scalar('Discriminator loss',
                              metric_loss_disc.result(),
                              step=epoch)
            tf.summary.scalar('Duality Gap',
                              metric_loss_dg.result(),
                              step=epoch)

        metric_loss_gen_en.reset_states()
        metric_loss_dg.reset_states()

        metric_loss_disc.reset_states()
        # Generate images
        gen_image = generate_images(gen, fixed_z, fixed_c, config)
        with summary_writer.as_default():
            tf.summary.image('Generated Images',
                             tf.expand_dims(gen_image, axis=0),
                             step=epoch)
        if config.save_model:
            ckpt_mgr.save(epoch + 1)
Beispiel #3
0
def train(config, gen, disc_f, disc_h, disc_j, model_en, train_data, test_data, train_data_repeat, model_copies=None):
    if config.eval_metrics:

        @tf.function
        def get_mnist_eval_metrics(real, fake):
            frechet = tfgan_mnist.mnist_frechet_distance(real, fake, 1)
            score = tfgan_mnist.mnist_score(fake, 1)
            return tf.stack(list(map(tf.stop_gradient, (frechet, score))))

    if config.load_model or config.do_eval:
        if config.load_from_further:
            # config_dir = Path('~/models/dg_further/configs').expanduser()
            config_dir = Path('~/models/dg_further_2/configs').expanduser()
        else:
            config_dir = Path('~/models/dg/exp/configs').expanduser()
        for cdir in config_dir.iterdir():
            ldir = cdir / 'logs'
            try:
                cmdline = (ldir / 'cmdlog.txt').read_text()
                cparams = dict(k[2:].split('=') for k in cmdline.split() if k.startswith('--'))
                print(cparams)
                if (
                        cparams['dataset'] == config.dataset and
                        cparams['train_dg'] == str(config.train_dg) and
                        cparams['steps_dg'] == str(config.steps_dg) and
                        cparams['conditional'] == str(config.conditional)
                        ):
                    checkpoint_dir = list(ldir.glob('*/checkpoints'))[0]
                    break
            except:
                pass
        else:
            raise ValueError('Model not found: {}'.format([config.dataset, config.train_dg, config.steps_dg, config.conditional]))

    train_data_iterator = iter(train_data_repeat)
    image, label = next(train_data_iterator)

    if config.train_dg or config.do_eval or config.load_model:
        forward_pass_fn_d_const = get_forward_pass_fn()
        forward_pass_fn_g_const = get_forward_pass_fn()
        forward_pass_fn_d_const(image, label, model_copies[gen], disc_f, disc_h, disc_j, model_copies[model_en], config.train_batch_size, config.num_cont_noise, config, update_d=False)
        forward_pass_fn_g_const(image, label, gen, model_copies[disc_f], model_copies[disc_h], model_copies[disc_j], model_en, config.train_batch_size, config.num_cont_noise, config, update_g=False)
    else:
        forward_pass_fn_d_const = get_forward_pass_fn()
        forward_pass_fn_d_const(image, label, gen, disc_f, disc_h, disc_j, model_en, config.train_batch_size, config.num_cont_noise, config)
        forward_pass_fn_g_const = None

    # Start training
    # Define optimizers
    disc_optimizer = tf.optimizers.Adam(learning_rate=config.lr_disc,
                                        beta_1=config.beta_1_disc,
                                        beta_2=config.beta_2_disc)

    gen_en_optimizer = tf.optimizers.Adam(learning_rate=config.lr_gen_en,
                                        beta_1=config.beta_1_gen_en,
                                       beta_2=config.beta_2_gen_en)

    dg_optimizer_d = tf.optimizers.Adam(learning_rate=config.lr_dg,
                                        beta_1=config.beta_1_disc,
                                       beta_2=config.beta_2_disc)

    dg_optimizer_g = tf.optimizers.Adam(learning_rate=config.lr_dg,
                                        beta_1=config.beta_1_gen_en,
                                       beta_2=config.beta_2_gen_en)


    out_dir = f'{config.result_path}/{config.model}_{config.dataset}_{time.strftime("%Y-%m-%d--%H-%M-%S")}'
    # Define Logging to Tensorboard
    summary_writer = tf.summary.create_file_writer(out_dir)

    fixed_z, fixed_c = get_fixed_random(config, num_to_generate=100)  # fixed_noise is just used for visualization.

    # Define metric
    metric_loss_gen_en = tf.keras.metrics.Mean()
    metric_loss_disc = tf.keras.metrics.Mean()
    metric_loss_dg = tf.keras.metrics.Mean()

    if config.train_dg or config.do_eval or config.load_model:
        train_step_fn_d_const = get_train_step_fn(forward_pass_fn_d_const)
        train_step_fn_g_const = get_train_step_fn(forward_pass_fn_g_const)
        train_step_fn_d_const(image, label, model_copies[gen], disc_f, disc_h, disc_j, model_copies[model_en], disc_optimizer, gen_en_optimizer, metric_loss_disc, metric_loss_gen_en, config.train_batch_size, config.num_cont_noise, config, update_d=False)
        train_step_fn_g_const(image, label, gen, model_copies[disc_f], model_copies[disc_h], model_copies[disc_j], model_en, disc_optimizer, gen_en_optimizer, metric_loss_disc, metric_loss_gen_en, config.train_batch_size, config.num_cont_noise, config, update_g=False)
    else:
        train_step_fn_d_const = get_train_step_fn(forward_pass_fn_d_const)
        train_step_fn_d_const(image, label, gen, disc_f, disc_h, disc_j, model_en, disc_optimizer, gen_en_optimizer, metric_loss_disc, metric_loss_gen_en, config.train_batch_size, config.num_cont_noise, config)
        train_step_fn_g_const = None

    ckpt = tf.train.Checkpoint(gen=gen, disc_f=disc_f, disc_h=disc_h, disc_j=disc_j, model_en=model_en)
    if config.save_model:
        ckpt_mgr = tf.train.CheckpointManager(ckpt, out_dir + '/checkpoints/', config.num_epochs+1)
        ckpt_mgr.save(0)
    if config.load_model and not config.do_eval:
        ckpt.restore(tf.train.latest_checkpoint(str(checkpoint_dir))).assert_consumed()

    # Start training
    epoch_tf = tf.Variable(0, trainable=False, dtype=tf.float32)
    for epoch in range(0, config.num_epochs + config.do_eval):
        logging.info(f'Start epoch {epoch+1} ...')  # logs a message.
        epoch_tf.assign(epoch)
        start_time = time.time()

        if config.do_eval:
            ckpt.restore(str(checkpoint_dir) + f'/ckpt-{epoch}').assert_consumed()

        datasets = [ (train_data, 'train') ]
        if config.do_eval:
            datasets.append((test_data, 'test'))
        for dataset, dataset_name in datasets:
            if config.eval_metrics:
                if config.only_eval_last:
                    if epoch < config.num_epochs:
                        continue
                num_points_eval = 10000 if config.only_eval_last else 1000
                if config.debug:
                    num_points_eval = 100
                eval_metrics_batches = num_points_eval // config.train_batch_size
                num_points_eval = eval_metrics_batches * config.train_batch_size
                real_images = []
                fake_images = []
                frechet = []
                score = []
                for batch in tqdm.tqdm(itt.islice(dataset, 0, eval_metrics_batches), total=eval_metrics_batches):
                    z, c = get_fixed_random(config, num_to_generate=config.train_batch_size)
                    fake = generate_images(gen, z, c, config, do_plot=False)
                    real = batch[0]
                    real, fake = map(lambda x: x * 2. - 1., (real, fake))
                    if len(real_images) < 200 or config.only_save:
                        real_images.extend([np.array(i) for i in real])
                        fake_images.extend([np.array(i) for i in fake])
                    if config.only_save:
                        continue
                    if config.dataset == 'mnist':
                        real, fake = map(lambda x: tf.image.resize(x, [28, 28]), (real, fake))
                        f, s = get_mnist_eval_metrics(real, fake)
                        frechet.append(f)
                        score.append(s)
                    elif config.dataset == 'cifar10':
                        frechet.append(0.)
                        score.append(0.)
                    elif config.dataset == 'fashion_mnist':
                        frechet.append(0.)
                        score.append(0.)

                frechet = sum(frechet) / max(1, len(frechet))
                score = sum(score) / max(1, len(score))

                with summary_writer.as_default():
                    tf.summary.scalar(f'Frechet Distance ({dataset_name})',frechet,step=epoch)
                    tf.summary.scalar(f'Score ({dataset_name})', score,step=epoch)

                real_images, fake_images = map(np.array, (real_images, fake_images))
                real_images, fake_images = map(lambda x: ((x+1.)/2.*255.).round().astype(np.uint8), (real_images, fake_images))
                np.save(f'logs/real-{epoch}.npy', real_images)
                np.save(f'logs/fake-{epoch}.npy', fake_images)

                continue
        
            train_epoch(train_step_fn_d_const, train_step_fn_g_const, forward_pass_fn_d_const, forward_pass_fn_g_const, train_data, train_data_iterator, gen,disc_f, disc_h, disc_j, model_en, disc_optimizer, gen_en_optimizer, dg_optimizer_g, dg_optimizer_d, metric_loss_disc, metric_loss_gen_en, metric_loss_dg, config.train_batch_size, config.num_cont_noise, config, model_copies=model_copies)
            epoch_time = time.time()-start_time

            # Save results
            logging.info(f'{dataset_name} - Epoch {epoch+1}: Disc_loss: {metric_loss_disc.result()}, Gen_loss: {metric_loss_gen_en.result()}, Time: {epoch_time}, DG: {metric_loss_dg.result()}')
            with summary_writer.as_default():
                tf.summary.scalar(f'Generator and Encoder loss ({dataset_name})',metric_loss_gen_en.result(),step=epoch)
                tf.summary.scalar(f'Discriminator loss ({dataset_name})', metric_loss_disc.result(),step=epoch)
                tf.summary.scalar(f'Duality Gap ({dataset_name})',metric_loss_dg.result(),step=epoch)

            metric_loss_gen_en.reset_states()
            metric_loss_dg.reset_states()

            metric_loss_disc.reset_states()
            # Generate images
            gen_image = generate_images(gen, fixed_z, fixed_c, config)
            with summary_writer.as_default():
                tf.summary.image('Generated Images', tf.expand_dims(gen_image,axis=0),step=epoch)
        if config.save_model:
            ckpt_mgr.save(epoch+1)
Beispiel #4
0
def train(config, gen, disc_f, disc_h, disc_j, model_en, train_data):
    # Start training
    # Define optimizers
    disc_optimizer = tf.optimizers.Adam(learning_rate=config.lr_disc,
                                        beta_1=config.beta_1_disc,
                                        beta_2=config.beta_2_disc)

    gen_en_optimizer = tf.optimizers.Adam(learning_rate=config.lr_gen_en,
                                          beta_1=config.beta_1_gen_en,
                                          beta_2=config.beta_2_gen_en)

    # Define Logging to Tensorboard
    summary_writer = tf.summary.create_file_writer(
        f'{config.result_path}/{config.model}_{config.dataset}_{time.strftime("%Y-%m-%d--%H-%M-%S")}'
    )

    fixed_z, fixed_c = get_fixed_random(
        config,
        num_to_generate=100)  # fixed_noise is just used for visualization.

    def fake_gene(x, *args, **kwargs):
        return x

    for image, label in train_data:
        image, image_aug = tf.split(image, 2, axis=-1)
        real_image = generate_images(fake_gene, image[:100], label[:100],
                                     config)
        with summary_writer.as_default():
            tf.summary.image('Real Images',
                             tf.expand_dims(real_image, axis=0),
                             step=0)
        break

    # Define metric
    metric_loss_gen_en = tf.keras.metrics.Mean()
    metric_loss_disc = tf.keras.metrics.Mean()
    basedir = f'{config.result_path}/{config.model}_{config.dataset}_{time.strftime("%Y-%m-%d--%H-%M-%S")}'
    # Start training
    epoch_tf = tf.Variable(0, trainable=False, dtype=tf.float32)
    for epoch in range(config.num_epochs):
        logging.info(f'Start epoch {epoch + 1} ...')  # logs a message.
        epoch_tf.assign(epoch)
        start_time = time.time()

        train_epoch(train_data, gen, disc_f, disc_h, disc_j, model_en,
                    disc_optimizer, gen_en_optimizer, metric_loss_disc,
                    metric_loss_gen_en, config.train_batch_size,
                    config.num_cont_noise, config)
        epoch_time = time.time() - start_time

        # Save results
        logging.info(
            f'Epoch {epoch + 1}: Disc_loss: {metric_loss_disc.result()}, Gen_loss: {metric_loss_gen_en.result()}, Time: {epoch_time}'
        )
        with summary_writer.as_default():
            tf.summary.scalar('Generator and Encoder loss',
                              metric_loss_gen_en.result(),
                              step=epoch)
            tf.summary.scalar('Discriminator loss',
                              metric_loss_disc.result(),
                              step=epoch)

        metric_loss_gen_en.reset_states()

        metric_loss_disc.reset_states()
        # Generated images and reconstructed images
        gen_image = generate_images(gen, fixed_z, fixed_c, config)
        with summary_writer.as_default():
            tf.summary.image('Generated Images',
                             tf.expand_dims(gen_image, axis=0),
                             step=epoch)

        if epoch % 25 == 0 or epoch == config.num_epochs - 1:
            for model, name in zip(
                [gen, disc_f, disc_h, disc_j, model_en],
                ['gen', 'disc_f', 'disc_h', 'disc_j', 'model_en']):
                model.save_weights(f'{basedir}/{name}_{epoch}')