Ejemplo n.º 1
0
def main(opts):
    # Load experiment setting
    config = get_config(opts.config)
    max_iter = config['max_iter']
    # Override the batch size if specified.
    if opts.batch_size != 0:
        config['batch_size'] = opts.batch_size

    trainer = Trainer(config)
    trainer.cuda()
    if opts.multigpus:
        ngpus = torch.cuda.device_count()
        config['gpus'] = ngpus
        print("Number of GPUs: %d" % ngpus)
        trainer.model = torch.nn.DataParallel(trainer.model,
                                              device_ids=range(ngpus))
    else:
        config['gpus'] = 1

    loaders = get_train_loaders(config)
    train_content_loader = loaders[0]
    train_class_loader = loaders[1]
    test_content_loader = loaders[2]
    test_class_loader = loaders[3]

    # Setup logger and output folders
    model_name = os.path.splitext(os.path.basename(opts.config))[0]
    train_writer = SummaryWriter(
        os.path.join(opts.output_path + "/logs", model_name))
    output_directory = os.path.join(opts.output_path + "/outputs", model_name)
    checkpoint_directory, image_directory = make_result_folders(
        output_directory)
    shutil.copy(opts.config, os.path.join(output_directory, 'config.yaml'))

    iterations = trainer.resume(checkpoint_directory,
                                hp=config,
                                multigpus=opts.multigpus) if opts.resume else 0

    while True:
        for it, (co_data, cl_data) in enumerate(
                zip(train_content_loader, train_class_loader)):
            with Timer("Elapsed time in update: %f"):
                d_acc = trainer.dis_update(co_data, cl_data, config)
                g_acc = trainer.gen_update(co_data, cl_data, config,
                                           opts.multigpus)
                torch.cuda.synchronize()
                print('D acc: %.4f\t G acc: %.4f' % (d_acc, g_acc))

            if (iterations + 1) % config['log_iter'] == 0:
                print("Iteration: %08d/%08d" % (iterations + 1, max_iter))
                write_loss(iterations, trainer, train_writer)

            if ((iterations + 1) % config['image_save_iter'] == 0
                    or (iterations + 1) % config['image_display_iter'] == 0):
                if (iterations + 1) % config['image_save_iter'] == 0:
                    key_str = '%08d' % (iterations + 1)
                    write_html(output_directory + "/index.html",
                               iterations + 1, config['image_save_iter'],
                               'images')
                else:
                    key_str = 'current'
                with torch.no_grad():
                    for t, (val_co_data, val_cl_data) in enumerate(
                            zip(train_content_loader, train_class_loader)):
                        if t >= opts.test_batch_size:
                            break
                        val_image_outputs = trainer.test(
                            val_co_data, val_cl_data, opts.multigpus)
                        write_1images(val_image_outputs, image_directory,
                                      'train_%s_%02d' % (key_str, t))
                    for t, (test_co_data, test_cl_data) in enumerate(
                            zip(test_content_loader, test_class_loader)):
                        if t >= opts.test_batch_size:
                            break
                        test_image_outputs = trainer.test(
                            test_co_data, test_cl_data, opts.multigpus)
                        write_1images(test_image_outputs, image_directory,
                                      'test_%s_%02d' % (key_str, t))

            if (iterations + 1) % config['snapshot_save_iter'] == 0:
                trainer.save(checkpoint_directory, iterations, opts.multigpus)
                print('Saved model at iteration %d' % (iterations + 1))

            iterations += 1
            if iterations >= max_iter:
                print("Finish Training")
                sys.exit(0)
Ejemplo n.º 2
0
        if ((iterations + 1) % config['image_save_iter'] == 0 or (
                iterations + 1) % config['image_display_iter'] == 0):
            if (iterations + 1) % config['image_save_iter'] == 0:
                key_str = '%08d' % (iterations + 1)
                write_html(output_directory + "/index.html", iterations + 1,
                           config['image_save_iter'], 'images')
            else:
                key_str = 'current'
            with torch.no_grad():
                for t, (val_co_data, val_cl_data) in enumerate(
                        zip(train_content_loader, train_class_loader)):
                    if t >= opts.test_batch_size:
                        break
                    val_image_outputs = trainer.test(val_co_data, val_cl_data,
                                                     opts.multigpus)
                    write_1images(val_image_outputs, image_directory,
                                  'train_%s_%02d' % (key_str, it))
                for t, (test_co_data, test_cl_data) in enumerate(
                            zip(test_content_loader, test_class_loader)):
                    if t >= opts.test_batch_size:
                        break
                    test_image_outputs = trainer.test(test_co_data,
                                                      test_cl_data,
                                                      opts.multigpus)
                    write_1images(test_image_outputs, image_directory,
                                  'test_%s_%02d' % (key_str, it))

        if (iterations + 1) % config['snapshot_save_iter'] == 0:
            trainer.save(checkpoint_directory, iterations, opts.multigpus)
            print('Saved model at iteration %d' % (iterations + 1))

        iterations += int(1)
Ejemplo n.º 3
0
def main(config, logger):

    print("Start extracting modalities...\n")

    modalities_encoder_trainer = ModalitiesEncoderTrainer(config, logger)
    encoder_first_epoch = modalities_encoder_trainer.load(
        config['logger']['checkpoint_dir']) if config['resume'] else 0
    modalities_encoder_trainer.train(encoder_first_epoch)

    modalities_extraction_loader = get_modalities_extraction_loader(config)
    modalities_extractor = ModalitiesExtractor(config)
    modalities = modalities_extractor.get_modalities(
        modalities_encoder_trainer.model, modalities_extraction_loader)
    modalities_grid = modalities_extractor.get_modalities_grid_image(
        modalities)
    logger.add_image("modality_per_col", modalities_grid, 0)

    del modalities_encoder_trainer
    del modalities_extractor
    torch.cuda.empty_cache()

    print(
        "Finished extracting modalities, begin training the translation network...\n"
    )

    train_source_loader, train_ref_loader, test_source_loader, test_ref_loader = get_gan_loaders(
        config, modalities)
    gan_trainer = GANTrainer(config)
    gan_trainer.to(config['device'])

    global_it = gan_trainer.resume(config['logger']['checkpoint_dir'],
                                   config) if config['resume'] else 0
    while global_it < config['gan']["max_iter"]:
        for it, (source_data, ref_data) in enumerate(
                zip(train_source_loader, train_ref_loader)):
            with Timer("Elapsed time in update: %f"):
                d_acc = gan_trainer.dis_update(source_data, ref_data, config)
                g_acc = gan_trainer.gen_update(source_data, ref_data, config)

                torch.cuda.synchronize(config['device'])

                print('D acc: %.4f\t G acc: %.4f' % (d_acc, g_acc))
                print("Iteration: {curr_iter}/{total_iter}".format(
                    curr_iter=str(global_it + 1).zfill(8),
                    total_iter=str(config['gan']['max_iter']).zfill(8)))

            # Save images for evaluation
            if global_it % config['logger']['eval_every'] == 0:
                with torch.no_grad():
                    for (val_source_data,
                         val_ref_data) in zip(train_source_loader,
                                              train_ref_loader):
                        val_image_outputs = gan_trainer.test(
                            val_source_data, val_ref_data)
                        write_1images(val_image_outputs,
                                      config['logger']['image_dir'],
                                      'train_{iter}'.format(iter=global_it))
                        save_image_tb(val_image_outputs, "train", global_it,
                                      logger)
                        break
                    for (test_source_data,
                         test_ref_data) in zip(test_source_loader,
                                               test_ref_loader):
                        test_image_outputs = gan_trainer.test(
                            test_source_data, test_ref_data)
                        write_1images(test_image_outputs,
                                      config['logger']['image_dir'],
                                      'test_{iter}'.format(iter=global_it))
                        save_image_tb(test_image_outputs, "test", global_it,
                                      logger)
                        break

            # Log losses
            if global_it % config['logger']['log_loss'] == 0:
                write_loss(global_it, gan_trainer, logger)

            # Save checkpoint
            if global_it % config['logger']['checkpoint_gan_every'] == 0:
                gan_trainer.save(config['logger']['checkpoint_dir'], global_it)

            global_it += 1

    print("Finished training!")