def main(args): print(args) print('starting on', platform.node()) if 'CUDA_VISIBLE_DEVICES' in os.environ: print('cuda gpus:',os.environ['CUDA_VISIBLE_DEVICES']) main_stream = torch.cuda.Stream() if args.fp16: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." print('Got fp16!') taskonomy_loss, losses, criteria, taskonomy_tasks = get_losses_and_tasks(args) print("including the following tasks:", list(losses.keys())) criteria2={'Loss':taskonomy_loss} for key,value in criteria.items(): criteria2[key]=value criteria = criteria2 print('data_dir =',args.data_dir, len(args.data_dir)) if args.no_augment: augment = False else: augment = True train_dataset = TaskonomyLoader( args.data_dir, label_set=taskonomy_tasks, model_whitelist='train_models.txt', model_limit=args.model_limit, output_size = (args.image_size,args.image_size), half_sized_output=args.half_sized_output, augment=augment) print('Found',len(train_dataset),'training instances.') print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch](tasks=losses.keys(),half_sized_output=args.half_sized_output) def get_n_params(model): pp=0 for p in list(model.parameters()): #print(p.size()) nn=1 for s in list(p.size()): nn = nn*s pp += nn return pp print("Model has", get_n_params(model), "parameters") try: print("Encoder has", get_n_params(model.encoder), "parameters") #flops, params=get_model_complexity_info(model.encoder,(3,256,256), as_strings=False, print_per_layer_stat=False) #print("Encoder has", flops, "Flops and", params, "parameters,") except: print("Each encoder has", get_n_params(model.encoders[0]), "parameters") for decoder in model.task_to_decoder.values(): print("Decoder has", get_n_params(decoder), "parameters") model = model.cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #tested with adamW. Poor results observed #optimizer = adamW.AdamW(model.parameters(),lr= args.lr,weight_decay=args.weight_decay,eps=1e-3) # Initialize Amp. Amp accepts either values or strings for the optional override arguments, # for convenient interoperation with argparse. if args.fp16: model, optimizer = amp.initialize(model, optimizer, opt_level='O1', loss_scale="dynamic", verbosity=0 ) print('Got fp16!') #args.lr = args.lr*float(args.batch_size*args.virtual_batch_multiplier)/256. # optionally resume from a checkpoint checkpoint=None if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location = lambda storage, loc: storage.cuda()) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.pretrained != '': print('loading pretrained weights for '+args.arch+' ('+args.pretrained+')') model.encoder.load_state_dict(torch.load(args.pretrained)) if torch.cuda.device_count() >1: model = torch.nn.DataParallel(model).cuda() if args.sync_batch_norm: from sync_batchnorm import patch_replication_callback patch_replication_callback(model) print('Virtual batch size =', args.batch_size*args.virtual_batch_multiplier) if args.resume: if os.path.isfile(args.resume) and 'optimizer' in checkpoint: optimizer.load_state_dict(checkpoint['optimizer']) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=None) val_loader = get_eval_loader(args.data_dir, taskonomy_tasks, args) trainer=Trainer(train_loader,val_loader,model,optimizer,criteria,args,checkpoint) if args.validate: trainer.progress_table=[] trainer.validate([{}]) print() return if args.test: trainer.progress_table=[] # replace val loader with a loader that loads test data trainer.val_loader=get_eval_loader(args.data_dir, taskonomy_tasks, args,model_limit=(1000,2000)) trainer.validate([{}]) return trainer.train()
def run(config): # Update the config dict as necessary # This is for convenience, to add settings derived from the user-specified # configuration into the config-dict (e.g. inferring the number of classes # and size of the images from the dataset, passing in a pytorch object # for the activation specified as a string) config['resolution'] = utils.imsize_dict[config['dataset']] config['n_classes'] = utils.nclass_dict[ config['dataset']] * config['cluster_per_class'] config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] if config['is_encoder']: config['E_fp16'] = float(config['D_fp16']) config['num_E_accumulations'] = int(config['num_D_accumulations']) config['dataset_channel'] = utils.channel_dict[config['dataset']] config['lambda_encoder'] = config['resolution']**2 * config[ 'dataset_channel'] # By default, skip init if resuming training. if config['resume']: print('Skipping initialization for training resumption...') config['skip_init'] = True config = utils.update_config_roots(config) device = 'cuda' # Seed RNG utils.seed_rng(config['seed']) # Prepare root folders if necessary utils.prepare_root(config) # Setup cudnn.benchmark for free speed torch.backends.cudnn.benchmark = True # Import the model--this line allows us to dynamically select different files. model = __import__(config['model']) experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) print('Experiment name is %s' % experiment_name) # Next, build the model G = model.Generator(**config).to(device) D = model.Discriminator(**config).to(device) if config['is_encoder']: E = model.Encoder(**{**config, 'D': D}).to(device) Prior = layers.Prior(**config).to(device) # If using EMA, prepare it if config['ema']: print('Preparing EMA for G with decay of {}'.format( config['ema_decay'])) G_ema = model.Generator(**{ **config, 'skip_init': True, 'no_optim': True }).to(device) ema = utils.ema(G, G_ema, config['ema_decay'], config['ema_start']) else: G_ema, ema = None, None # FP16? if config['G_fp16']: print('Casting G to float16...') G = G.half() if config['ema']: G_ema = G_ema.half() if not config['prior_type'] == 'default': Prior = Prior.half() if config['D_fp16']: print('Casting D to fp16...') D = D.half() # Consider automatically reducing SN_eps? if config['is_encoder'] and config['E_fp16']: print('Casting E to fp16...') E = E.half() print(G) print(D) if config['is_encoder']: print(E) print(Prior) if not config['is_encoder']: GD = model.G_D(G, D) print('Number of params in G: {} D: {}'.format(*[ sum([p.data.nelement() for p in net.parameters()]) for net in [G, D] ])) else: GD = model.G_D(G, D, E, Prior) GE = model.G_E(G, E, Prior) print('Number of params in G: {} D: {} E: {}'.format(*[ sum([p.data.nelement() for p in net.parameters()]) for net in [G, D, E] ])) # Prepare state dict, which holds things like epoch # and itr # # ¡¡¡¡¡¡¡¡¡ Put rec error, discriminator loss and generator loss !!!!!!!!!!!????????? state_dict = { 'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'best_error_rec': 99999, 'config': config } # If loading from a pre-trained model, load weights if config['resume']: print('Loading weights...') utils.load_weights( G, D, state_dict, config['weights_root'], experiment_name, config['load_weights'] if config['load_weights'] else None, G_ema if config['ema'] else None, E=None if not config['is_encoder'] else E, Prior=Prior if not config['prior_type'] == 'default' else None) # If parallel, parallelize the GD module if config['parallel']: GD = nn.DataParallel(GD) if config['cross_replica']: patch_replication_callback(GD) # If parallel, parallelize the GD module #if config['parallel'] and config['is_encoder']: # GE = nn.DataParallel(GE) # if config['cross_replica']: # patch_replication_callback(GE) # Prepare loggers for stats; metrics holds test metrics, # lmetrics holds any desired training metrics. test_metrics_fname = '%s/%s_log.jsonl' % (config['logs_root'], experiment_name) train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name) print('Inception Metrics will be saved to {}'.format(test_metrics_fname)) test_log = utils.MetricsLogger(test_metrics_fname, reinitialize=(not config['resume'])) print('Training Metrics will be saved to {}'.format(train_metrics_fname)) train_log = utils.MyLogger(train_metrics_fname, reinitialize=(not config['resume']), logstyle=config['logstyle']) # Write metadata utils.write_metadata(config['logs_root'], experiment_name, config, state_dict) # Prepare data; the Discriminator's batch size is all that needs to be passed # to the dataloader, as G doesn't require dataloading. # Note that at every loader iteration we pass in enough data to complete # a full D iteration (regardless of number of D steps and accumulations) D_batch_size = (config['batch_size'] * config['num_D_steps'] * config['num_D_accumulations']) loaders = utils.get_data_loaders(**{ **config, 'batch_size': D_batch_size, 'start_itr': state_dict['itr'] }) if config['is_encoder']: config_aux = config.copy() config_aux['augment'] = False dataloader_noaug = utils.get_data_loaders( **{ **config_aux, 'batch_size': D_batch_size, 'start_itr': state_dict['itr'] }) # Prepare inception metrics: FID and IS if (config['dataset'] in ['C10']): get_inception_metrics = inception_utils.prepare_inception_metrics( config['dataset'], config['parallel'], config['no_fid']) else: get_inception_metrics = None # Loaders are loaded, prepare the training function if config['which_train_fn'] == 'GAN': train = train_fns.GAN_training_function( G, D, GD, Prior, ema, state_dict, config, losses.Loss_obj(**config), None if not config['is_encoder'] else E) # Else, assume debugging and use the dummy train fn else: train = train_fns.dummy_training_function() # Prepare Sample function for use with inception metrics sample = functools.partial( utils.sample, G=(G_ema if config['ema'] and config['use_ema'] else G), Prior=Prior, config=config) # Create fixed fixed_z, fixed_y = Prior.sample_noise_and_y() fixed_z, fixed_y = fixed_z.clone(), fixed_y.clone() iter_num = 0 print('Beginning training at epoch %d...' % state_dict['epoch']) # Train for specified number of epochs, although we mostly track G iterations. for epoch in range(state_dict['epoch'], config['num_epochs']): # Which progressbar to use? TQDM or my own? if config['pbar'] == 'mine': pbar = utils.progress(loaders[0], displaytype='s1k' if config['use_multiepoch_sampler'] else 'eta') else: pbar = tqdm(loaders[0]) for i, (x, y) in enumerate(pbar): # Increment the iteration counter state_dict['itr'] += 1 # Make sure G and D are in training mode, just in case they got set to eval # For D, which typically doesn't have BN, this shouldn't matter much. G.train() D.train() if config['is_encoder']: E.train() if not config['prior_type'] == 'default': Prior.train() if config['ema']: G_ema.train() if config['D_fp16']: x, y = x.to(device).half(), y.to(device) else: x, y = x.to(device), y.to(device) metrics = train(x, y, iter_num) train_log.log(itr=int(state_dict['itr']), **metrics) # Every sv_log_interval, log singular values if (config['sv_log_interval'] > 0) and ( not (state_dict['itr'] % config['sv_log_interval'])): train_log.log(itr=int(state_dict['itr']), **{ **utils.get_SVs(G, 'G'), **utils.get_SVs(D, 'D') }) if config['is_encoder']: train_log.log(itr=int(state_dict['itr']), **{**utils.get_SVs(E, 'E')}) # If using my progbar, print metrics. if config['pbar'] == 'mine': print(', '.join( ['itr: %d' % state_dict['itr']] + ['%s : %+4.3f' % (key, metrics[key]) for key in metrics]), end=' ') # Save weights and copies as configured at specified interval if not (state_dict['itr'] % config['save_every']): if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() if not config['prior_type'] == 'default': Prior.eval() if config['ema']: G_ema.eval() train_fns.save_and_sample( G, D, G_ema, Prior, fixed_z, fixed_y, state_dict, config, experiment_name, None if not config['is_encoder'] else E) if not (state_dict['itr'] % config['test_every']) and config['is_encoder']: if not config['prior_type'] == 'default': test_acc, test_acc_iter, error_rec = train_fns.test_accuracy( GE, dataloader_noaug, device, config['D_fp16'], config) p_mse, p_lik = train_fns.test_p_acc(GE, device, config) if config['n_classes'] == 10: utils.reconstruction_sheet( GE, classes_per_sheet=utils.classes_per_sheet_dict[ config['dataset']], num_classes=config['n_classes'], samples_per_class=20, parallel=config['parallel'], samples_root=config['samples_root'], experiment_name=experiment_name, folder_number=state_dict['itr'], dataloader=dataloader_noaug, device=device, D_fp16=config['D_fp16'], config=config) # Test every specified interval if not (state_dict['itr'] % config['test_every']): if config['G_eval_mode']: print('Switchin G to eval mode...') if not config['prior_type'] == 'default': Prior.eval() G.eval() train_fns.test( G, D, G_ema, Prior, state_dict, config, sample, get_inception_metrics, experiment_name, test_log, None if not config['is_encoder'] else E, None if config['prior_type'] == 'default' else (test_acc, test_acc_iter, error_rec, p_mse, p_lik)) if not (state_dict['itr'] % config['test_every']): utils.create_curves(train_metrics_fname, plot_sv=False, prior_type=config['prior_type'], is_E=config['is_encoder']) utils.plot_IS_FID(train_metrics_fname) # Increment epoch counter at end of epoch iter_num += 1 state_dict['epoch'] += 1
def run(config): config['resolution'] = utils.imsize_dict[config['dataset']] config['n_classes'] = utils.nclass_dict[config['dataset']] config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] if config['resume']: config['skip_init'] = True config = utils.update_config_roots(config) device = 'cuda' utils.seed_rng(config['seed']) utils.prepare_root(config) torch.backends.cudnn.benchmark = True model = __import__(config['model']) experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) G = model.Generator(**config).to(device) D = model.Discriminator(**config).to(device) G3 = model.Generator(**config).to(device) D3 = model.Discriminator(**config).to(device) if config['ema']: G_ema = model.Generator(**{**config, 'skip_init': True, 'no_optim': True}).to(device) ema = utils.ema(G, G_ema, config['ema_decay'], config['ema_start']) else: G_ema, ema = None, None if config['G_fp16']: G = G.half() if config['ema']: G_ema = G_ema.half() if config['D_fp16']: D = D.half() GD = model.G_D(G, D, config['conditional']) GD3 = model.G_D(G3, D3, config['conditional']) state_dict = {'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config} if config['resume']: utils.load_weights(G, D, state_dict, config['weights_root'], experiment_name, config['load_weights'] if config['load_weights'] else None, G_ema if config['ema'] else None) #utils.load_weights(G, D, state_dict, '../Task3_CIFAR_MNIST_KLWGAN_Simulation_Experiment/weights', 'C10Ukl5', 'best0', G_ema if config['ema'] else None) #utils.load_weights(G, D, state_dict, '../Task1_CIFAR_MNIST_KLWGAN_Simulation_Experiment/weights', 'C10Ukl5', 'best0', G_ema if config['ema'] else None) #utils.load_weights(G3, D3, state_dict, '../Task2_CIFAR_MNIST_KLWGAN_Simulation_Experiment/weights', 'C10Ukl5', 'last0', G_ema if config['ema'] else None) #utils.load_weights(G3, D3, state_dict, '../Task2_CIFAR_MNIST_KLWGAN_Simulation_Experiment/weights', 'C10Ukl5', 'best0', G_ema if config['ema'] else None) utils.load_weights(G3, D3, state_dict, '../Task2_CIFAR_MNIST_KLWGAN_Simulation_Experiment/weights', 'C10Ukl5', 'last0', G_ema if config['ema'] else None) utils.load_weights(G, D, state_dict, '../Task3_CIFAR_MNIST_KLWGAN_Simulation_Experiment/weights', 'C10Ukl5', 'best0', G_ema if config['ema'] else None) if config['parallel']: GD = nn.DataParallel(GD) if config['cross_replica']: patch_replication_callback(GD) test_metrics_fname = '%s/%s_log.jsonl' % (config['logs_root'], experiment_name) train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name) test_log = utils.MetricsLogger(test_metrics_fname, reinitialize=(not config['resume'])) train_log = utils.MyLogger(train_metrics_fname, reinitialize=(not config['resume']), logstyle=config['logstyle']) utils.write_metadata(config['logs_root'], experiment_name, config, state_dict) D_batch_size = (config['batch_size'] * config['num_D_steps'] * config['num_D_accumulations']) # Use: config['abnormal_class'] #print(config['abnormal_class']) abnormal_class = config['abnormal_class'] select_dataset = config['select_dataset'] #print(config['select_dataset']) #print(select_dataset) loaders = utils.get_data_loaders(**{**config, 'batch_size': D_batch_size, 'start_itr': state_dict['itr'], 'abnormal_class': abnormal_class, 'select_dataset': select_dataset}) # Usage: --select_dataset cifar10 --abnormal_class 0 --shuffle --batch_size 64 --parallel --num_G_accumulations 1 --num_D_accumulations 1 --num_epochs 500 --num_D_steps 4 --G_lr 2e-4 --D_lr 2e-4 --dataset C10 --data_root ../Task2_CIFAR_MNIST_KLWGAN_Simulation_Experiment/data/ --G_ortho 0.0 --G_attn 0 --D_attn 0 --G_init N02 --D_init N02 --ema --use_ema --ema_start 1000 --start_eval 50 --test_every 5000 --save_every 2000 --num_best_copies 5 --num_save_copies 2 --loss_type kl_5 --seed 2 --which_best FID --model BigGAN --experiment_name C10Ukl5 # Use: --select_dataset mnist --abnormal_class 1 --shuffle --batch_size 64 --parallel --num_G_accumulations 1 --num_D_accumulations 1 --num_epochs 500 --num_D_steps 4 --G_lr 2e-4 --D_lr 2e-4 --dataset C10 --data_root ../Task2_CIFAR_MNIST_KLWGAN_Simulation_Experiment/data/ --G_ortho 0.0 --G_attn 0 --D_attn 0 --G_init N02 --D_init N02 --ema --use_ema --ema_start 1000 --start_eval 50 --test_every 5000 --save_every 2000 --num_best_copies 5 --num_save_copies 2 --loss_type kl_5 --seed 2 --which_best FID --model BigGAN --experiment_name C10Ukl5 G_batch_size = max(config['G_batch_size'], config['batch_size']) z_, y_ = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) fixed_z, fixed_y = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) fixed_z.sample_() fixed_y.sample_() if not config['conditional']: fixed_y.zero_() y_.zero_() if config['which_train_fn'] == 'GAN': train = train_fns.GAN_training_function(G3, D3, GD3, G3, D3, GD3, G, D, GD, z_, y_, ema, state_dict, config) else: train = train_fns.dummy_training_function() sample = functools.partial(utils.sample, G=(G_ema if config['ema'] and config['use_ema'] else G), z_=z_, y_=y_, config=config) if config['dataset'] == 'C10U' or config['dataset'] == 'C10': data_moments = 'fid_stats_cifar10_train.npz' #'../Task1_CIFAR_MNIST_KLWGAN_Simulation_Experiment/fid_stats_cifar10_train.npz' #data_moments = '../Task1_CIFAR_MNIST_KLWGAN_Simulation_Experiment/fid_stats_cifar10_train.npz' else: print("Cannot find the data set.") sys.exit() for epoch in range(state_dict['epoch'], config['num_epochs']): if config['pbar'] == 'mine': pbar = utils.progress(loaders[0], displaytype='s1k' if config['use_multiepoch_sampler'] else 'eta') else: pbar = tqdm(loaders[0]) for i, (x, y) in enumerate(pbar): state_dict['itr'] += 1 G.eval() D.train() if config['ema']: G_ema.train() if config['D_fp16']: x, y = x.to(device).half(), y.to(device) else: x, y = x.to(device), y.to(device) print('') # Random seed #print(config['seed']) if epoch==0 and i==0: print(config['seed']) metrics = train(x, y) # We double the learning rate if we double the batch size. train_log.log(itr=int(state_dict['itr']), **metrics) if (config['sv_log_interval'] > 0) and (not (state_dict['itr'] % config['sv_log_interval'])): train_log.log(itr=int(state_dict['itr']), **{**utils.get_SVs(G, 'G'), **utils.get_SVs(D, 'D')}) if config['pbar'] == 'mine': print(', '.join(['itr: %d' % state_dict['itr']] + ['%s : %+4.3f' % (key, metrics[key]) for key in metrics]), end=' ') if not (state_dict['itr'] % config['save_every']): if config['G_eval_mode']: G.eval() if config['ema']: G_ema.eval() train_fns.save_and_sample(G, D, G_ema, z_, y_, fixed_z, fixed_y, state_dict, config, experiment_name) experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) if (not (state_dict['itr'] % config['test_every'])) and (epoch >= config['start_eval']): if config['G_eval_mode']: G.eval() if config['ema']: G_ema.eval() utils.sample_inception( G_ema if config['ema'] and config['use_ema'] else G, config, str(epoch)) folder_number = str(epoch) sample_moments = '%s/%s/%s/samples.npz' % (config['samples_root'], experiment_name, folder_number) FID = fid_score.calculate_fid_given_paths([data_moments, sample_moments], batch_size=50, cuda=True, dims=2048) train_fns.update_FID(G, D, G_ema, state_dict, config, FID, experiment_name, test_log) state_dict['epoch'] += 1 #utils.save_weights(G, D, state_dict, config['weights_root'], experiment_name, 'be01Bes01Best%d' % state_dict['save_best_num'], G_ema if config['ema'] else None) utils.save_weights(G, D, state_dict, config['weights_root'], experiment_name, 'last%d' % 0, G_ema if config['ema'] else None)
def run(config): # Update the config dict as necessary # This is for convenience, to add settings derived from the user-specified # configuration into the config-dict (e.g. inferring the number of classes # and size of the images from the dataset, passing in a pytorch object # for the activation specified as a string) config['resolution'] = utils.imsize_dict[config['dataset']] config['n_classes'] = utils.nclass_dict[config['dataset']] config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] # By default, skip init if resuming training. if config['resume']: print('Skipping initialization for training resumption...') config['skip_init'] = True config = utils.update_config_roots(config) device = 'cuda' # Seed RNG utils.seed_rng(config['seed']) # Prepare root folders if necessary utils.prepare_root(config) # Setup cudnn.benchmark for free speed torch.backends.cudnn.benchmark = True # Import the model--this line allows us to dynamically select different files. model = __import__(config['model']) experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) print('Experiment name is %s' % experiment_name) # Next, build the model G = model.Generator(**config).to(device) D = model.Discriminator(**config).to(device) E = model.ImgEncoder(**config).to(device) # E = model.Encoder(**config).to(device) # If using EMA, prepare it if config['ema']: print('Preparing EMA for G with decay of {}'.format( config['ema_decay'])) G_ema = model.Generator(**{ **config, 'skip_init': True, 'no_optim': True }).to(device) ema = utils.ema(G, G_ema, config['ema_decay'], config['ema_start']) else: G_ema, ema = None, None # FP16? if config['G_fp16']: print('Casting G to float16...') G = G.half() if config['ema']: G_ema = G_ema.half() if config['D_fp16']: print('Casting D to fp16...') D = D.half() # Consider automatically reducing SN_eps? GDE = model.G_D_E(G, D, E) print('Number of params in G: {} D: {} E: {}'.format(*[ sum([p.data.nelement() for p in net.parameters()]) for net in [G, D, E] ])) # Prepare state dict, which holds things like epoch # and itr # state_dict = { 'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config } # If loading from a pre-trained model, load weights if config['resume']: print('Loading weights...') utils.load_weights( G, D, E, state_dict, config['weights_root'], experiment_name, config['load_weights'] if config['load_weights'] else None, G_ema if config['ema'] else None) # If parallel, parallelize the GD module if config['parallel']: GDE = nn.DataParallel(GDE) if config['cross_replica']: patch_replication_callback(GDE) # Prepare loggers for stats; metrics holds test metrics, # lmetrics holds any desired training metrics. test_metrics_fname = '%s/%s_log.jsonl' % (config['logs_root'], experiment_name) train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name) print('Inception Metrics will be saved to {}'.format(test_metrics_fname)) test_log = utils.MetricsLogger(test_metrics_fname, reinitialize=(not config['resume'])) print('Training Metrics will be saved to {}'.format(train_metrics_fname)) train_log = utils.MyLogger(train_metrics_fname, reinitialize=(not config['resume']), logstyle=config['logstyle']) # Write metadata utils.write_metadata(config['logs_root'], experiment_name, config, state_dict) # Prepare data; the Discriminator's batch size is all that needs to be passed # to the dataloader, as G doesn't require dataloading. # Note that at every loader iteration we pass in enough data to complete # a full D iteration (regardless of number of D steps and accumulations) D_batch_size = (config['batch_size'] * config['num_D_steps'] * config['num_D_accumulations']) loaders, train_dataset = utils.get_data_loaders( **{ **config, 'batch_size': D_batch_size, 'start_itr': state_dict['itr'] }) # # Prepare inception metrics: FID and IS # get_inception_metrics = inception_utils.prepare_inception_metrics( # config['dataset'], config['parallel'], config['no_fid']) # Prepare noise and randomly sampled label arrays # Allow for different batch sizes in G G_batch_size = max(config['G_batch_size'], config['batch_size']) z_, y_ = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) # Prepare a fixed z & y to see individual sample evolution throghout training fixed_z, fixed_y = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) fixed_z.sample_() fixed_y.sample_() print("fixed_y original: {} {}".format(fixed_y.shape, fixed_y[:10])) ## TODO: change the sample method to sample x and y fixed_x, fixed_y_of_x = utils.prepare_x_y(G_batch_size, train_dataset, experiment_name, config) # Build image pool to prevent mode collapes if config['img_pool_size'] != 0: img_pool = ImagePool(config['img_pool_size'], train_dataset.num_class,\ save_dir=os.path.join(config['imgbuffer_root'], experiment_name), resume_buffer=config['resume_buffer']) else: img_pool = None # Loaders are loaded, prepare the training function if config['which_train_fn'] == 'GAN': train = train_fns.GAN_training_function(G, D, E, GDE, ema, state_dict, config, img_pool) # Else, assume debugging and use the dummy train fn else: train = train_fns.dummy_training_function() # Prepare Sample function for use with inception metrics sample = functools.partial( utils.sample, G=(G_ema if config['ema'] and config['use_ema'] else G), z_=z_, y_=y_, config=config) # print('Beginning training at epoch %f...' % (state_dict['itr'] * D_batch_size / len(train_dataset))) print("Beginning training at Epoch {} (iteration {})".format( state_dict['epoch'], state_dict['itr'])) # # Train for specified number of epochs, although we mostly track G iterations. # for epoch in range(state_dict['epoch'], config['num_epochs']): # Which progressbar to use? TQDM or my own? if config['pbar'] == 'mine': pbar = utils.progress( loaders[0], displaytype='s1k' if config['use_multiepoch_sampler'] else 'eta') else: pbar = tqdm(loaders[0]) for i, (x, y) in enumerate(pbar): # Increment the iteration counter state_dict['itr'] += 1 # Make sure G and D are in training mode, just in case they got set to eval # For D, which typically doesn't have BN, this shouldn't matter much. G.eval() D.eval() if config['ema']: G_ema.eval() if config['D_fp16']: x, y = x.to(device).half(), y.to(device) else: x, y = x.to(device), y.to(device) # Every sv_log_interval, log singular values if (config['sv_log_interval'] > 0) and (not (state_dict['itr'] % config['sv_log_interval'])): train_log.log(itr=int(state_dict['itr']), **{ **utils.get_SVs(G, 'G'), **utils.get_SVs(D, 'D') }) # If using my progbar, print metrics. if config['pbar'] == 'mine': print(', '.join( ['itr: %d' % state_dict['itr']] + ['%s : %+4.3f' % (key, metrics[key]) for key in metrics]), end=' ') # Save weights and copies as configured at specified interval if (not state_dict['itr'] % config['save_img_every']) or ( not state_dict['itr'] % config['save_model_every']): if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() if config['ema']: G_ema.eval() save_weights = config['save_weights'] if state_dict['itr'] % config['save_model_every']: save_weights = False train_fns.save_and_sample(G, D, E, G_ema, fixed_x, fixed_y_of_x, z_, y_, state_dict, config, experiment_name, img_pool, save_weights=save_weights) # # Test every specified interval # if not (state_dict['itr'] % config['test_every']): # if config['G_eval_mode']: # print('Switchin G to eval mode...') # G.eval() # train_fns.test(G, D, G_ema, z_, y_, state_dict, config, sample, # get_inception_metrics, experiment_name, test_log) # Increment epoch counter at end of epoch state_dict['epoch'] = state_dict['itr'] * D_batch_size / ( len(train_dataset)) print("Finished Epoch {} (iteration {})".format( state_dict['epoch'], state_dict['itr']))
def run(config): config['resolution'] = imsize_dict[config['dataset']] config['n_classes'] = nclass_dict[config['dataset']] config['G_activation'] = activation_dict[config['G_nl']] config['D_activation'] = activation_dict[config['D_nl']] if config['resume']: config['skip_init'] = True config = update_config_roots(config) device = 'cuda' utils_Task1_KLWGAN_Simulation_Experiment.seed_rng(config['seed']) utils_Task1_KLWGAN_Simulation_Experiment.prepare_root(config) torch.backends.cudnn.benchmark = True model = __import__(config['model']) experiment_name = (config['experiment_name'] if config['experiment_name'] else utils_Task1_KLWGAN_Simulation_Experiment.name_from_config(config)) G = model.Generator(**config).to(device) D = model.Discriminator(**config).to(device) if config['ema']: G_ema = model.Generator(**{**config, 'skip_init': True, 'no_optim': True}).to(device) ema = utils_Task1_KLWGAN_Simulation_Experiment.ema(G, G_ema, config['ema_decay'], config['ema_start']) else: G_ema, ema = None, None if config['G_fp16']: G = G.half() if config['ema']: G_ema = G_ema.half() if config['D_fp16']: D = D.half() GD = model.G_D(G, D, config['conditional']) state_dict = {'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config} if config['resume']: utils_Task1_KLWGAN_Simulation_Experiment.load_weights(G, D, state_dict, config['weights_root'], experiment_name, config['load_weights'] if config['load_weights'] else None, G_ema if config['ema'] else None) if config['parallel']: GD = nn.DataParallel(GD) if config['cross_replica']: patch_replication_callback(GD) test_metrics_fname = '%s/%s_log.jsonl' % (config['logs_root'], experiment_name) train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name) test_log = utils_Task1_KLWGAN_Simulation_Experiment.MetricsLogger(test_metrics_fname, reinitialize=(not config['resume'])) train_log = utils_Task1_KLWGAN_Simulation_Experiment.MyLogger(train_metrics_fname, reinitialize=(not config['resume']), logstyle=config['logstyle']) utils_Task1_KLWGAN_Simulation_Experiment.write_metadata(config['logs_root'], experiment_name, config, state_dict) D_batch_size = (config['batch_size'] * config['num_D_steps'] * config['num_D_accumulations']) # Use: config['abnormal_class'] #print(config['abnormal_class']) abnormal_class = config['abnormal_class'] select_dataset = config['select_dataset'] #print(config['select_dataset']) #print(select_dataset) loaders = utils_Task1_KLWGAN_Simulation_Experiment.get_data_loaders(**{**config, 'batch_size': D_batch_size, 'start_itr': state_dict['itr'], 'abnormal_class': abnormal_class, 'select_dataset': select_dataset}) G_batch_size = max(config['G_batch_size'], config['batch_size']) z_, y_ = utils_Task1_KLWGAN_Simulation_Experiment.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) fixed_z, fixed_y = utils_Task1_KLWGAN_Simulation_Experiment.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) fixed_z.sample_() fixed_y.sample_() if not config['conditional']: fixed_y.zero_() y_.zero_() if config['which_train_fn'] == 'GAN': train = train_fns.GAN_training_function(G, D, GD, z_, y_, ema, state_dict, config) else: train = train_fns.dummy_training_function() sample = functools.partial(utils_Task1_KLWGAN_Simulation_Experiment.sample, G=(G_ema if config['ema'] and config['use_ema'] else G), z_=z_, y_=y_, config=config) if config['dataset'] == 'C10U' or config['dataset'] == 'C10': data_moments = 'fid_stats_cifar10_train.npz' #'../Task1_CIFAR_MNIST_KLWGAN_Simulation_Experiment/fid_stats_cifar10_train.npz' #data_moments = '../Task1_CIFAR_MNIST_KLWGAN_Simulation_Experiment/fid_stats_cifar10_train.npz' else: print("Cannot find the dataset.") sys.exit() for epoch in range(state_dict['epoch'], config['num_epochs']): if config['pbar'] == 'mine': pbar = utils_Task1_KLWGAN_Simulation_Experiment.progress(loaders[0], displaytype='s1k' if config['use_multiepoch_sampler'] else 'eta') else: pbar = tqdm(loaders[0]) for i, (x, y) in enumerate(pbar): state_dict['itr'] += 1 G.train() D.train() if config['ema']: G_ema.train() if config['D_fp16']: x, y = x.to(device).half(), y.to(device) else: x, y = x.to(device), y.to(device) #metrics = train(x, y) print('') # Random seed #print(config['seed']) if epoch==0 and i==0: print(config['seed']) metrics = train(x, y) # We double the learning rate if we double the batch size. train_log.log(itr=int(state_dict['itr']), **metrics) if (config['sv_log_interval'] > 0) and (not (state_dict['itr'] % config['sv_log_interval'])): train_log.log(itr=int(state_dict['itr']), **{**utils_Task1_KLWGAN_Simulation_Experiment.get_SVs(G, 'G'), **utils_Task1_KLWGAN_Simulation_Experiment.get_SVs(D, 'D')}) if config['pbar'] == 'mine': print(', '.join(['itr: %d' % state_dict['itr']] + ['%s : %+4.3f' % (key, metrics[key]) for key in metrics]), end=' ') if not (state_dict['itr'] % config['save_every']): if config['G_eval_mode']: G.eval() if config['ema']: G_ema.eval() train_fns.save_and_sample(G, D, G_ema, z_, y_, fixed_z, fixed_y, state_dict, config, experiment_name) experiment_name = (config['experiment_name'] if config['experiment_name'] else utils_Task1_KLWGAN_Simulation_Experiment.name_from_config(config)) if (not (state_dict['itr'] % config['test_every'])) and (epoch >= config['start_eval']): if config['G_eval_mode']: G.eval() if config['ema']: G_ema.eval() utils_Task1_KLWGAN_Simulation_Experiment.sample_inception(G_ema if config['ema'] and config['use_ema'] else G, config, str(epoch)) folder_number = str(epoch) sample_moments = '%s/%s/%s/samples.npz' % (config['samples_root'], experiment_name, folder_number) #FID = fid_score.calculate_fid_given_paths([data_moments, sample_moments], batch_size=50, cuda=True, dims=2048) #train_fns.update_FID(G, D, G_ema, state_dict, config, FID, experiment_name, test_log) # Use the files train_fns.py and utils_Task1_KLWGAN_Simulation_Experiment.py # Use the functions update_FID() and save_weights() # Save the lowest FID score FID = fid_score.calculate_fid_given_paths([data_moments, sample_moments], batch_size=50, cuda=True, dims=2048) train_fns.update_FID(G, D, G_ema, state_dict, config, FID, experiment_name, test_log) # FID also from: https://github.com/DarthSid95/RumiGANs/blob/main/gan_metrics.py # Implicit generative models and GANs generate sharp, low-FID, realistic, and high-quality images. # We use implicit generative models and GANs for the challenging task of anomaly detection in high-dimensional spaces. state_dict['epoch'] += 1 # Save the last model utils_Task1_KLWGAN_Simulation_Experiment.save_weights(G, D, state_dict, config['weights_root'], experiment_name, 'last%d' % 0, G_ema if config['ema'] else None)
def run(config): # Update the config dict as necessary # This is for convenience, to add settings derived from the user-specified # configuration into the config-dict (e.g. inferring the number of classes # and size of the images from the dataset, passing in a pytorch object # for the activation specified as a string) config['resolution'] = utils.imsize_dict[config['dataset']] # config['n_classes'] = utils.nclass_dict[config['dataset']] # NOTE: setting n_classes to 1 except in conditional case to train as unconditional model config['n_classes'] = 1 if config['conditional']: config['n_classes'] = 2 print('n classes: {}'.format(config['n_classes'])) config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] # By default, skip init if resuming training. if config['resume']: print('Skipping initialization for training resumption...') config['skip_init'] = True config = utils.update_config_roots(config) device = 'cuda' # Seed RNG utils.seed_rng(config['seed']) # Prepare root folders if necessary utils.prepare_root(config) # Setup cudnn.benchmark for free speed torch.backends.cudnn.benchmark = True # Import the model--this line allows us to dynamically select different files. model = __import__(config['model']) experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) print('Experiment name is %s' % experiment_name) # Next, build the model G = model.Generator(**config).to(device) D = model.Discriminator(**config).to(device) # If using EMA, prepare it if config['ema']: print('Preparing EMA for G with decay of {}'.format( config['ema_decay'])) G_ema = model.Generator(**{ **config, 'skip_init': True, 'no_optim': True }).to(device) ema = utils.ema(G, G_ema, config['ema_decay'], config['ema_start']) else: G_ema, ema = None, None # FP16? if config['G_fp16']: print('Casting G to float16...') G = G.half() if config['ema']: G_ema = G_ema.half() if config['D_fp16']: print('Casting D to fp16...') D = D.half() # Consider automatically reducing SN_eps? GD = model.G_D( G, D, config['conditional']) # check if labels are 0's if "unconditional" print(G) print(D) print('Number of params in G: {} D: {}'.format( * [sum([p.data.nelement() for p in net.parameters()]) for net in [G, D]])) # Prepare state dict, which holds things like epoch # and itr # state_dict = { 'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num_fair': 0, 'save_best_num_fid': 0, 'best_IS': 0, 'best_FID': 999999, 'best_fair_d': 999999, 'config': config } # If loading from a pre-trained model, load weights if config['resume']: print('Loading weights...') utils.load_weights( G, D, state_dict, config['weights_root'], experiment_name, config['load_weights'] if config['load_weights'] else None, G_ema if config['ema'] else None) # If parallel, parallelize the GD module if config['parallel']: GD = nn.DataParallel(GD) if config['cross_replica']: patch_replication_callback(GD) # Prepare loggers for stats; metrics holds test metrics, # lmetrics holds any desired training metrics. test_metrics_fname = '%s/%s_log.json' % (config['logs_root'], experiment_name) train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name) print('Inception Metrics will be saved to {}'.format(test_metrics_fname)) test_log = utils.MetricsLogger(test_metrics_fname, reinitialize=(not config['resume'])) print('Training Metrics will be saved to {}'.format(train_metrics_fname)) train_log = utils.MyLogger(train_metrics_fname, reinitialize=(not config['resume']), logstyle=config['logstyle']) # Write metadata utils.write_metadata(config['logs_root'], experiment_name, config, state_dict) # Prepare data; the Discriminator's batch size is all that needs to be passed # to the dataloader, as G doesn't require dataloading. # Note that at every loader iteration we pass in enough data to complete # a full D iteration (regardless of number of D steps and accumulations) D_batch_size = (config['batch_size'] * config['num_D_steps'] * config['num_D_accumulations']) loaders = utils.get_data_loaders( config, **{ **config, 'batch_size': D_batch_size, 'start_itr': state_dict['itr'] }) # Prepare inception metrics: FID and IS get_inception_metrics = inception_utils.prepare_inception_metrics( config['dataset'], config['parallel'], config['no_fid']) # Prepare noise and randomly sampled label arrays # Allow for different batch sizes in G G_batch_size = max(config['G_batch_size'], config['batch_size']) z_, y_ = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16'], true_prop=config['true_prop']) # Prepare a fixed z & y to see individual sample evolution throghout training fixed_z, fixed_y = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) fixed_z.sample_() fixed_y.sample_() # NOTE: "unconditional" GAN if not config['conditional']: fixed_y.zero_() y_.zero_() # Loaders are loaded, prepare the training function if config['which_train_fn'] == 'GAN': train = train_fns.GAN_training_function(G, D, GD, z_, y_, ema, state_dict, config) # Else, assume debugging and use the dummy train fn else: train = train_fns.dummy_training_function() # Prepare Sample function for use with inception metrics sample = functools.partial( utils.sample, G=(G_ema if config['ema'] and config['use_ema'] else G), z_=z_, y_=y_, config=config) print('Beginning training at epoch %d...' % state_dict['epoch']) # Train for specified number of epochs, although we mostly track G iterations. for epoch in range(state_dict['epoch'], config['num_epochs']): # Which progressbar to use? TQDM or my own? if config['pbar'] == 'mine': pbar = utils.progress(loaders[0], displaytype='s1k' if config['use_multiepoch_sampler'] else 'eta') else: pbar = tqdm(loaders[0]) # iterate through the dataloaders for i, (x, y, ratio) in enumerate(pbar): # Increment the iteration counter state_dict['itr'] += 1 # Make sure G and D are in training mode, just in case they got set to eval # For D, which typically doesn't have BN, this shouldn't matter much. G.train() D.train() if config['ema']: G_ema.train() if config['D_fp16']: x, y, ratio = x.to(device).half(), y.to(device), ratio.to( device) else: x, y, ratio = x.to(device), y.to(device), ratio.to(device) metrics = train(x, y, ratio) train_log.log(itr=int(state_dict['itr']), **metrics) # Every sv_log_interval, log singular values if (config['sv_log_interval'] > 0) and ( not (state_dict['itr'] % config['sv_log_interval'])): train_log.log(itr=int(state_dict['itr']), **{ **utils.get_SVs(G, 'G'), **utils.get_SVs(D, 'D') }) # If using my progbar, print metrics. if config['pbar'] == 'mine': print(', '.join( ['itr: %d' % state_dict['itr']] + ['%s : %+4.3f' % (key, metrics[key]) for key in metrics]), end=' ') # Save weights and copies as configured at specified interval if not (state_dict['itr'] % config['save_every']): if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() if config['ema']: G_ema.eval() train_fns.save_and_sample(G, D, G_ema, z_, y_, fixed_z, fixed_y, state_dict, config, experiment_name) # Test every epoch (not specified interval) if (epoch >= config['start_eval']): # First, find correct inception moments data_moments = '../../fid_stats/unbiased_all_gender_fid_stats.npz' if config['multi']: data_moments = '../../fid_stats/unbiased_all_multi_fid_stats.npz' fid_type = 'multi' else: fid_type = 'gender' # load appropriate moments print('Loaded data moments at: {}'.format(data_moments)) experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) # eval mode for FID computation if config['G_eval_mode']: print('Switching G to eval mode...') G.eval() if config['ema']: G_ema.eval() utils.sample_inception( G_ema if config['ema'] and config['use_ema'] else G, config, str(epoch)) # Get saved sample path folder_number = str(epoch) sample_moments = '%s/%s/%s/samples.npz' % ( config['samples_root'], experiment_name, folder_number) # Calculate FID FID = fid_score.calculate_fid_given_paths( [data_moments, sample_moments], batch_size=100, cuda=True, dims=2048) print("FID calculated") train_fns.update_FID(G, D, G_ema, state_dict, config, FID, experiment_name, test_log, epoch) # added epoch logging # Increment epoch counter at end of epoch print('Completed epoch {}'.format(epoch)) state_dict['epoch'] += 1
def run(config): # Update the config dict as necessary # This is for convenience, to add settings derived from the user-specified # configuration into the config-dict (e.g. inferring the number of classes # and size of the images from the dataset, passing in a pytorch object # for the activation specified as a string) config['resolution'] = utils.imsize_dict[config['dataset']] config['n_classes'] = utils.nclass_dict[config['dataset']] config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] # By default, skip init if resuming training. if config['resume']: print('Skipping initialization for training resumption...') config['skip_init'] = True config = vae_utils.update_config_roots(config) device = 'cuda' # Seed RNG utils.seed_rng(config['seed']) # Prepare root folders if necessary utils.prepare_root(config) # Setup cudnn.benchmark for free speed torch.backends.cudnn.benchmark = True # Import the model--this line allows us to dynamically select different files. experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) print('Experiment name is %s' % experiment_name) # Next, build the model E = Ex.Extractor(**config).to(device) # If using EMA, prepare it if config['ema']: print('Preparing EMA for E with decay of {}'.format(config['ema_decay'])) E_ema = Ex.Extractor(**{**config, 'skip_init':True, 'no_optim': True}).to(device) ema = utils.ema(E, E_ema, config['ema_decay'], config['ema_start']) else: E_ema, ema = None, None print(E) print('Number of params in E: {}'.format( sum([p.data.nelement() for p in E.parameters()]))) # Prepare state dict, which holds things like epoch # and itr # state_dict = {'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config} # If loading from a pre-trained model, load weights if config['resume']: print('Loading weights...') vae_utils.load_weights([E], state_dict, config['weights_root'], experiment_name, config['load_weights'] if config['load_weights'] else None, [E_ema] if config['ema'] else None) # If parallel, parallelize the GD module if config['parallel']: E_parallel = nn.DataParallel(E) if config['cross_replica']: patch_replication_callback(E_parallel) # Prepare loggers for stats; metrics holds test metrics, # lmetrics holds any desired training metrics. test_metrics_fname = '%s/%s_log.jsonl' % (config['logs_root'], experiment_name) train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name) print('Inception Metrics will be saved to {}'.format(test_metrics_fname)) test_log = utils.MetricsLogger(test_metrics_fname, reinitialize=(not config['resume'])) print('Training Metrics will be saved to {}'.format(train_metrics_fname)) train_log = utils.MyLogger(train_metrics_fname, reinitialize=(not config['resume']), logstyle=config['logstyle']) # Write metadata utils.write_metadata(config['logs_root'], experiment_name, config, state_dict) # Prepare data; the Discriminator's batch size is all that needs to be passed # to the dataloader, as G doesn't require dataloading. # Note that at every loader iteration we pass in enough data to complete # a full D iteration (regardless of number of D steps and accumulations) D_batch_size = (config['batch_size'] * 8) loaders = mini_datasets.get_data_loaders(**{**config, 'batch_size': D_batch_size, 'start_itr': state_dict['itr']}) # Prepare noise and randomly sampled label arrays # Allow for different batch sizes in G G_batch_size = max(config['G_batch_size'], config['batch_size']) # Loaders are loaded, prepare the training function if config['which_train_fn'] == 'GAN': train = Ex.Extractor_training_function(E, ema, E_parallel, state_dict, config) # Else, assume debugging and use the dummy train fn else: train = train_fns.dummy_training_function() print('Beginning training at epoch %d...' % state_dict['epoch']) # Train for specified number of epochs, although we mostly track G iterations. for epoch in range(state_dict['epoch'], config['num_epochs']): # Which progressbar to use? TQDM or my own? if config['pbar'] == 'mine': pbar = utils.progress(zip(loaders[0], loaders[1]), displaytype='s1k' if config['use_multiepoch_sampler'] else 'eta') else: pbar = tqdm(zip(loaders[0], loaders[1])) for i, (lx, ly, ux, uy) in enumerate(pbar): x = torch.cat([lx, ux], 0) y = torch.cat([ly, uy]) # Increment the iteration counter state_dict['itr'] += 1 # Make sure G and D are in training mode, just in case they got set to eval # For D, which typically doesn't have BN, this shouldn't matter much. E.train() ## Last night we process here! if config['ema']: E_ema.train() if config['D_fp16']: x, y = x.to(device).half(), y.to(device) else: x, y = x.to(device), y.to(device) metrics = train(x, y) train_log.log(itr=int(state_dict['itr']), **metrics) # Every sv_log_interval, log singular values if (config['sv_log_interval'] > 0) and (not (state_dict['itr'] % config['sv_log_interval'])): train_log.log(itr=int(state_dict['itr']), **{**utils.get_SVs(G, 'G'), **utils.get_SVs(D, 'D')}) # If using my progbar, print metrics. if config['pbar'] == 'mine': print(', '.join(['itr: %d' % state_dict['itr']] + ['%s : %+4.3f' % (key, metrics[key]) for key in metrics]), end=' ') # Save weights and copies as configured at specified interval if not (state_dict['itr'] % config['save_every']): if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() if config['ema']: G_ema.eval() train_fns.save_and_sample(G, D, G_ema, z_, y_, fixed_z, fixed_y, state_dict, config, experiment_name) # Test every specified interval if not (state_dict['itr'] % config['test_every']): if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() train_fns.test(G, D, G_ema, z_, y_, state_dict, config, sample, get_inception_metrics, experiment_name, test_log) # Increment epoch counter at end of epoch state_dict['epoch'] += 1
def run(config): # Update the config dict as necessary # This is for convenience, to add settings derived from the user-specified # configuration into the config-dict (e.g. inferring the number of classes # and size of the images from the dataset, passing in a pytorch object # for the activation specified as a string) config['resolution'] = utils.imsize_dict[config['dataset']] config['n_classes'] = utils.nclass_dict[config['dataset']] config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] # By default, skip init if resuming training. if config['resume']: print('Skipping initialization for training resumption...') config['skip_init'] = True config = vae_utils.update_config_roots(config) device = 'cuda' # Seed RNG utils.seed_rng(config['seed']) # Prepare root folders if necessary utils.prepare_root(config) # Setup cudnn.benchmark for free speed torch.backends.cudnn.benchmark = True experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) print('Experiment name is %s' % experiment_name) # Next, build the model G = Generator(**{**config, 'skip_init': True, 'no_optim': True}).to(device) print('Loading pretrained G for dir %s ...' % config['pretrained_G_dir']) pretrained_dict = torch.load(config['pretrained_G_dir']) G_dict = G.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in G_dict} G_dict.update(pretrained_dict) G.load_state_dict(G_dict) E = Encoder(**config).to(device) utils.toggle_grad(G, False) utils.toggle_grad(E, True) class G_E(nn.Module): def __init__(self): super(G_E, self).__init__() self.G = G self.E = E def forward(self, w, y): with torch.no_grad(): net = self.G(w, self.G.shared(y)) net = self.E(net) return net GE = G_E() # If using EMA, prepare it if config['ema']: print('Preparing EMA for E with decay of {}'.format( config['ema_decay'])) E_ema = Encoder(**{ **config, 'skip_init': True, 'no_optim': True }).to(device) e_ema = utils.ema(E, E_ema, config['ema_decay'], config['ema_start']) else: E_ema, e_ema = None, None print(G) print(E) print('Number of params in G: {} E: {}'.format( * [sum([p.data.nelement() for p in net.parameters()]) for net in [G, E]])) # Prepare state dict, which holds things like epoch # and itr # state_dict = { 'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config } # If loading from a pre-trained model, load weights if config['resume']: print('Loading weights...') vae_utils.load_weights( [E], state_dict, config['weights_root'], experiment_name, config['load_weights'] if config['load_weights'] else None, [e_ema] if config['ema'] else None) # If parallel, parallelize the GD module if config['parallel']: GE = nn.DataParallel(GE) if config['cross_replica']: patch_replication_callback(GE) # Prepare loggers for stats; metrics holds test metrics, # lmetrics holds any desired training metrics. train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name) print('Training Metrics will be saved to {}'.format(train_metrics_fname)) train_log = utils.MyLogger(train_metrics_fname, reinitialize=(not config['resume']), logstyle=config['logstyle']) # Write metadata utils.write_metadata(config['logs_root'], experiment_name, config, state_dict) G_batch_size = max(config['G_batch_size'], config['batch_size']) z_, y_ = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) def train(): E.optim.zero_grad() z_.sample_() y_.sample_() net = GE(z_[:config['batch_size']], y_[:config['batch_size']]) loss = F.l1_loss(z_[:config['batch_size']], net) loss.backward() if config["E_ortho"] > 0.0: print('using modified ortho reg in E') utils.ortho(E, config['E_ortho']) E.optim.step() out = {'loss': float(loss.item())} return out print('Beginning training at epoch %d...' % state_dict['epoch']) # Train for specified number of epochs, although we mostly track G iterations. for epoch in range(state_dict['epoch'], config['num_epochs']): for i in range(100000): # Increment the iteration counter state_dict['itr'] += 1 # Make sure G and D are in training mode, just in case they got set to eval # For D, which typically doesn't have BN, this shouldn't matter much. G.train() E.train() if config['ema']: E_ema.train() metrics = train() train_log.log(itr=int(state_dict['itr']), **metrics) # Every sv_log_interval, log singular values if (config['sv_log_interval'] > 0) and ( not (state_dict['itr'] % config['sv_log_interval'])): train_log.log(itr=int(state_dict['itr']), **{ **utils.get_SVs(G, 'G'), **utils.get_SVs(E, 'E') }) # If using my progbar, print metrics. if config['pbar'] == 'mine': print(', '.join( ['itr: %d' % state_dict['itr']] + ['%s : %+4.3f' % (key, metrics[key]) for key in metrics]), end=' ') # Save weights and copies as configured at specified interval if not (state_dict['itr'] % config['save_every']): vae_utils.save_weights([E], state_dict, config['weights_root'], experiment_name, 'copy%d' % state_dict['save_num'], [E_ema if config['ema'] else None]) state_dict['save_num'] = (state_dict['save_num'] + 1) % config['num_save_copies'] # Increment epoch counter at end of epoch state_dict['epoch'] += 1
def run(config): timer = vae_utils.Timer() # Update the config dict as necessary # This is for convenience, to add settings derived from the user-specified # configuration into the config-dict (e.g. inferring the number of classes # and size of the images from the dataset, passing in a pytorch object # for the activation specified as a string) config['resolution'] = utils.imsize_dict[config['dataset']] config['n_classes'] = utils.nclass_dict[config['dataset']] config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] # By default, skip init if resuming training. if config['resume']: print('Skipping initialization for training resumption...') config['skip_init'] = True config = vae_utils.update_config_roots(config) device = 'cuda' # Seed RNG utils.seed_rng(config['seed']) # Prepare root folders if necessary utils.prepare_root(config) # Setup cudnn.benchmark for free speed torch.backends.cudnn.benchmark = True # Import the model--this line allows us to dynamically select different files. experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) print('Experiment name is %s' % experiment_name) # Next, build the model E = Encoder(**{**config, 'arch': 'default'}).to(device) Out = Encoder(**{**config, 'arch': 'out'}).to(device) # If using EMA, prepare it if config['ema']: print('Preparing EMA for G with decay of {}'.format( config['ema_decay'])) E_ema = Encoder(**{ **config, 'skip_init': True, 'no_optim': True, 'arch': 'default' }).to(device) O_ema = Encoder(**{ **config, 'skip_init': True, 'no_optim': True, 'arch': 'out' }).to(device) eema = utils.ema(E, E_ema, config['ema_decay'], config['ema_start']) oema = utils.ema(Out, O_ema, config['ema_decay'], config['ema_start']) else: E_ema, eema, O_ema, oema = None, None, None, None print(E) print(Out) print('Number of params in E: {}'.format(*[ sum([p.data.nelement() for p in net.parameters()]) for net in [E, Out] ])) # Prepare state dict, which holds things like epoch # and itr # state_dict = { 'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config, 'best_precise': 0.0 } # If loading from a pre-trained model, load weights if config['resume']: print('Loading weights...') vae_utils.load_weights( [E, Out], state_dict, config['weights_root'], experiment_name, config['load_weights'] if config['load_weights'] else None, [E_ema, O_ema] if config['ema'] else [None]) class Wrapper(nn.Module): def __init__(self): super(Wrapper, self).__init__() self.E = E self.O = Out def forward(self, x): x = self.E(x) x = self.O(x) return x W = Wrapper() # If parallel, parallelize the GD module if config['parallel']: W = nn.DataParallel(W) if config['cross_replica']: patch_replication_callback(W) # Prepare loggers for stats; metrics holds test metrics, # lmetrics holds any desired training metrics. test_metrics_fname = '%s/%s_log.jsonl' % (config['logs_root'], experiment_name) train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name) print('Inception Metrics will be saved to {}'.format(test_metrics_fname)) test_log = utils.MetricsLogger(test_metrics_fname, reinitialize=(not config['resume'])) print('Training Metrics will be saved to {}'.format(train_metrics_fname)) train_log = utils.MyLogger(train_metrics_fname, reinitialize=(not config['resume']), logstyle=config['logstyle']) # Write metadata utils.write_metadata(config['logs_root'], experiment_name, config, state_dict) # Batch size for dataloader, prefetch 8 times batch batch_size = config['batch_size'] * config['num_D_steps'] * config[ 'num_D_accumulations'] eval_loader = utils.get_data_loaders(**{ **config, 'load_in_mem': False, 'use_multiepoch_sampler': False })[0] dense_eval = vae_utils.dense_eval(2048, config['n_classes'], steps=5).to(device) eval_fn = functools.partial(vae_utils.eval_encoder, sample_batch=10, config=config, loader=eval_loader, dense_eval=dense_eval, device=device) E_scheduler = torch.optim.lr_scheduler.StepLR(E.optim, step_size=2, gamma=0.1) O_scheduler = torch.optim.lr_scheduler.StepLR(Out.optim, step_size=2, gamma=0.1) def train(w, img): E.optim.zero_grad() Out.optim.zero_grad() w_ = W(img) loss = F.mse_loss(w_, w, reduction='mean') loss.backward() if config['E_ortho'] > 0.0: # Debug print to indicate we're using ortho reg in D. print('using modified ortho reg in E') utils.ortho(E, config['E_ortho']) utils.ortho(Out, config['E_ortho']) E.optim.step() Out.optim.step() out = {' loss': float(loss.item())} if config['ema']: for ema in [eema, oema]: ema.update(state_dict['itr']) del w_, loss return out start, end = sampled_ssgan.make_dset_range(config['ssgan_sample_root'], config['ssgan_piece'], batch_size) timer.update() print( 'Beginning training at epoch %d (runing time %02d day %02d h %02d min %02d sec) ...' % ((state_dict['epoch'], ) + timer.runing_time)) # Train for specified number of epochs, although we mostly track G iterations. for epoch in range(state_dict['epoch'], config['num_epochs']): for piece in range(config['ssgan_piece']): timer.update() print( 'Load %d-th piece of ssgan sample into memory (runing time %02d day %02d h %02d min %02d sec)...' % ((piece, ) + timer.runing_time)) loader = sampled_ssgan.get_SSGAN_sample_loader( **{ **config, 'batch_size': batch_size, 'start_itr': state_dict['itr'], 'start': start[piece], 'end': end[piece] }) for _ in range(200): for i, (img, z, w) in enumerate(loader): # Increment the iteration counter state_dict['itr'] += 1 # Make sure G and D are in training mode, just in case they got set to eval # For D, which typically doesn't have BN, this shouldn't matter much. E.train() Out.train() if config['ema']: E_ema.train() O_ema.train() img, w = img.to(device), w.to(device) counter = 0 img = torch.split(img, config['batch_size']) w = torch.split(w, config['batch_size']) metrics = train(w[counter], img[counter]) counter += 1 del img, w train_log.log(itr=int(state_dict['itr']), **metrics) if not (state_dict['itr'] % 100): timer.update() print( "Runing time %02d day %02d h %02d min %02d sec," % timer.runing_time + ', '.join(['itr: %d' % state_dict['itr']] + [ '%s : %+4.3f' % (key, metrics[key]) for key in metrics ])) # Save weights and copies as configured at specified interval if not (state_dict['itr'] % config['save_every']): if config['G_eval_mode']: print('Switchin E to eval mode...') E.eval() if config['ema']: E_ema.eval() sampled_ssgan.save_and_eavl(E, Out, E_ema, O_ema, state_dict, config, experiment_name, eval_fn, test_log) E_scheduler.step() O_scheduler.step() del loader # Increment epoch counter at end of epoch state_dict['epoch'] += 1
def run(config): # Update the config dict as necessary # This is for convenience, to add settings derived from the user-specified # configuration into the config-dict (e.g. inferring the number of classes # and size of the images from the dataset, passing in a pytorch object # for the activation specified as a string) config['resolution'] = utils.imsize_dict[config['dataset']] config['n_classes'] = utils.nclass_dict[config['dataset']] config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] # By default, skip init if resuming training. if config['resume']: print('Skipping initialization for training resumption...') config['skip_init'] = True config = utils.update_config_roots(config) device = 'cuda' num_devices = torch.cuda.device_count() # Seed RNG utils.seed_rng(config['seed']) # Prepare root folders if necessary utils.prepare_root(config) # Setup cudnn.benchmark for free speed torch.backends.cudnn.benchmark = True # Import the model--this line allows us to dynamically select different files. model = __import__(config['model']) experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) print('Experiment name is %s' % experiment_name) # Next, build the model G = model.Generator(**config).to(device) D = model.ImageDiscriminator(**config).to(device) if config['no_Dv'] == False: Dv = model.VideoDiscriminator(**config).to(device) else: Dv = None # If using EMA, prepare it if config['ema']: print('Preparing EMA for G with decay of {}'.format( config['ema_decay'])) G_ema = model.Generator(**{ **config, 'skip_init': True, 'no_optim': True }).to(device) ema = utils.ema(G, G_ema, config['ema_decay'], config['ema_start']) else: G_ema, ema = None, None # FP16? if config['G_fp16']: print('Casting G to float16...') G = G.half() if config['ema']: G_ema = G_ema.half() if config['D_fp16']: print('Casting D to fp16...') D = D.half() if config['no_Dv'] == False: Dv = Dv.half() # Consider automatically reducing SN_eps? GD = model.G_D( G, D, Dv, config['k'], config['T_into_B']) #xiaodan: add an argument k and T_into_B # print('GD.k in train.py line 91',GD.k) # print(G) # xiaodan: print disabled by xiaodan. Too many stuffs # print(D) if config['no_Dv'] == False: print('Number of params in G: {} D: {} Dv: {}'.format(*[ sum([p.data.nelement() for p in net.parameters()]) for net in [G, D, Dv] ])) else: print('Number of params in G: {} D: {}'.format(*[ sum([p.data.nelement() for p in net.parameters()]) for net in [G, D] ])) # Prepare state dict, which holds things like epoch # and itr # state_dict = { 'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config } # If loading from a pre-trained BigGAN model, load weights if config['biggan_init']: print('Loading weights from pre-trained BigGAN...') utils.load_biggan_weights(G, D, state_dict, config['biggan_weights_root'], G_ema if config['ema'] else None, load_optim=False) # If loading from a pre-trained model, load weights if config['resume']: print('Loading weights...') utils.load_weights( G, D, Dv, state_dict, config['weights_root'], experiment_name, config['load_weights'] if config['load_weights'] else None, G_ema if config['ema'] else None) # If parallel, parallelize the GD module if config['parallel']: GD = nn.DataParallel(GD) if config['cross_replica']: patch_replication_callback(GD) # Prepare loggers for stats; metrics holds test metrics, # lmetrics holds any desired training metrics. test_metrics_fname = '%s/%s_log.jsonl' % (config['logs_root'], experiment_name) train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name) print('Inception Metrics will be saved to {}'.format(test_metrics_fname)) test_log = utils.MetricsLogger(test_metrics_fname, reinitialize=(not config['resume'])) print('Training Metrics will be saved to {}'.format(train_metrics_fname)) train_log = utils.MyLogger(train_metrics_fname, reinitialize=(not config['resume']), logstyle=config['logstyle']) # Write metadata utils.write_metadata(config['logs_root'], experiment_name, config, state_dict) # Prepare data; the Discriminator's batch size is all that needs to be passed # to the dataloader, as G doesn't require dataloading. # Note that at every loader iteration we pass in enough data to complete # a full D iteration (regardless of number of D steps and accumulations) D_batch_size = (config['batch_size'] * config['num_D_steps'] * config['num_D_accumulations']) if config['dataset'] == 'C10': loaders = utils.get_video_cifar_data_loader( **{ **config, 'batch_size': D_batch_size, 'start_itr': state_dict['itr'] }) else: loaders = utils.get_video_data_loaders(**{ **config, 'batch_size': D_batch_size, 'start_itr': state_dict['itr'] }) # print(loaders) # print(loaders[0]) print('D loss weight:', config['D_loss_weight']) # Prepare inception metrics: FID and IS if config['skip_testing'] == False: get_inception_metrics = inception_utils.prepare_inception_metrics( config['dataset'], config['parallel'], config['no_fid']) # Prepare noise and randomly sampled label arrays # Allow for different batch sizes in G G_batch_size = max( config['G_batch_size'], config['batch_size'] ) # * num_devices #xiaodan: num_devices added by xiaodan # print('num_devices:',num_devices,'G_batch_size:',G_batch_size) z_, y_ = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) # print('z_,y_ shapes after prepare_z_y:',z_.shape,y_.shape) # print('z_,y_ size:',z_.shape,y_.shape) # print('G.dim_z:',G.dim_z) # Prepare a fixed z & y to see individual sample evolution throghout training fixed_z, fixed_y = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) fixed_z.sample_() fixed_y.sample_() # Loaders are loaded, prepare the training function if config['which_train_fn'] == 'GAN': train = train_fns.GAN_training_function(G, D, Dv, GD, z_, y_, ema, state_dict, config) # Else, assume debugging and use the dummy train fn else: train = train_fns.dummy_training_function() # Prepare Sample function for use with inception metrics sample = functools.partial( utils.sample, G=(G_ema if config['ema'] and config['use_ema'] else G), z_=z_, y_=y_, config=config) print('Beginning training at epoch %d...' % state_dict['epoch']) unique_id = datetime.datetime.now().strftime('%Y%m-%d%H-%M%S-') tensorboard_path = os.path.join(config['logs_root'], 'tensorboard_logs', unique_id) os.makedirs(tensorboard_path) # Train for specified number of epochs, although we mostly track G iterations. writer = SummaryWriter(log_dir=tensorboard_path) for epoch in range(state_dict['epoch'], config['num_epochs']): # Which progressbar to use? TQDM or my own? if config['pbar'] == 'mine': pbar = utils.progress(loaders[0], displaytype='s1k' if config['use_multiepoch_sampler'] else 'eta') else: pbar = tqdm(loaders[0]) iteration = epoch * len(pbar) for i, (x, y) in enumerate(pbar): # Increment the iteration counter state_dict['itr'] += 1 # Make sure G and D are in training mode, just in case they got set to eval # For D, which typically doesn't have BN, this shouldn't matter much. G.train() D.train() if config['no_Dv'] == False: Dv.train() if config['ema']: G_ema.train() if config['D_fp16']: x, y = x.to(device).half(), y.to(device) else: x, y = x.to(device), y.to(device) metrics = train(x, y, writer, iteration + i) train_log.log(itr=int(state_dict['itr']), **metrics) # Every sv_log_interval, log singular values if (config['sv_log_interval'] > 0) and ( not (state_dict['itr'] % config['sv_log_interval'])): if config['no_Dv'] == False: train_log.log(itr=int(state_dict['itr']), **{ **utils.get_SVs(G, 'G'), **utils.get_SVs(D, 'D'), **utils.get_SVs(Dv, 'Dv') }) else: train_log.log(itr=int(state_dict['itr']), **{ **utils.get_SVs(G, 'G'), **utils.get_SVs(D, 'D') }) # If using my progbar, print metrics. if config['pbar'] == 'mine': print(', '.join( ['itr: %d' % state_dict['itr']] + ['%s : %+4.3f' % (key, metrics[key]) for key in metrics]), end=' ') # Save weights and copies as configured at specified interval if not (state_dict['itr'] % config['save_every']): if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() if config['ema']: G_ema.eval() train_fns.save_and_sample(G, D, Dv, G_ema, z_, y_, fixed_z, fixed_y, state_dict, config, experiment_name) #xiaodan: Disabled test for now because we don't have inception data # Test every specified interval if not (state_dict['itr'] % config['test_every']) and config['skip_testing'] == False: if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() IS_mean, IS_std, FID = train_fns.test( G, D, Dv, G_ema, z_, y_, state_dict, config, sample, get_inception_metrics, experiment_name, test_log) writer.add_scalar('Inception/IS', IS_mean, iteration + i) writer.add_scalar('Inception/IS_std', IS_std, iteration + i) writer.add_scalar('Inception/FID', FID, iteration + i) # Increment epoch counter at end of epoch state_dict['epoch'] += 1
def run(config): logger = logging.getLogger('tl') # Update the config dict as necessary # This is for convenience, to add settings derived from the user-specified # configuration into the config-dict (e.g. inferring the number of classes # and size of the images from the dataset, passing in a pytorch object # for the activation specified as a string) config['resolution'] = utils.imsize_dict[config['dataset']] config['n_classes'] = utils.nclass_dict[config['dataset']] config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] # By default, skip init if resuming training. if config['resume']: print('Skipping initialization for training resumption...') config['skip_init'] = True config = utils.update_config_roots(config) device = 'cuda' # Seed RNG utils.seed_rng(config['seed']) # Prepare root folders if necessary utils.prepare_root(config) # Setup cudnn.benchmark for free speed torch.backends.cudnn.benchmark = True # Import the model--this line allows us to dynamically select different files. model = importlib.import_module(config['model']) # model = __import__(config['model']) experiment_name = 'exp' # experiment_name = (config['experiment_name'] if config['experiment_name'] # else utils.name_from_config(config)) print('Experiment name is %s' % experiment_name) # Next, build the model G = model.Generator(**config, cfg=getattr(global_cfg, 'generator', None)).to(device) D = model.Discriminator(**config, cfg=getattr(global_cfg, 'discriminator', None)).to(device) # If using EMA, prepare it if config['ema']: print('Preparing EMA for G with decay of {}'.format( config['ema_decay'])) G_ema = model.Generator(**{ **config, 'skip_init': True, 'no_optim': True }, cfg=getattr(global_cfg, 'generator', None)).to(device) ema = utils.ema(G, G_ema, config['ema_decay'], config['ema_start']) else: G_ema, ema = None, None # FP16? if config['G_fp16']: print('Casting G to float16...') G = G.half() if config['ema']: G_ema = G_ema.half() if config['D_fp16']: print('Casting D to fp16...') D = D.half() # Consider automatically reducing SN_eps? GD = model.G_D(G, D) logger.info(G) logger.info(D) logger.info('Number of params in G: {} D: {}'.format( * [sum([p.data.nelement() for p in net.parameters()]) for net in [G, D]])) # Prepare state dict, which holds things like epoch # and itr # state_dict = { 'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config } # If loading from a pre-trained model, load weights if config['resume']: print('Loading weights...') utils.load_weights(G=G, D=D, state_dict=state_dict, weights_root=global_cfg.resume_cfg.weights_root, experiment_name='', name_suffix=config['load_weights'] if config['load_weights'] else None, G_ema=G_ema if config['ema'] else None) logger.info(f"Resume IS={state_dict['best_IS']}") logger.info(f"Resume FID={state_dict['best_FID']}") # If parallel, parallelize the GD module if config['parallel']: GD = nn.DataParallel(GD) if config['cross_replica']: patch_replication_callback(GD) # Prepare loggers for stats; metrics holds test metrics, # lmetrics holds any desired training metrics. test_metrics_fname = '%s/%s_log.jsonl' % (config['logs_root'], experiment_name) train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name) print('Inception Metrics will be saved to {}'.format(test_metrics_fname)) test_log = utils.MetricsLogger(test_metrics_fname, reinitialize=(not config['resume'])) print('Training Metrics will be saved to {}'.format(train_metrics_fname)) train_log = utils.MyLogger(train_metrics_fname, reinitialize=(not config['resume']), logstyle=config['logstyle']) # Write metadata utils.write_metadata(config['logs_root'], experiment_name, config, state_dict) # Prepare data; the Discriminator's batch size is all that needs to be passed # to the dataloader, as G doesn't require dataloading. # Note that at every loader iteration we pass in enough data to complete # a full D iteration (regardless of number of D steps and accumulations) D_batch_size = (config['batch_size'] * config['num_D_steps'] * config['num_D_accumulations']) loaders = utils.get_data_loaders( **{ **config, 'batch_size': D_batch_size, 'start_itr': state_dict['itr'], **getattr(global_cfg, 'train_dataloader', {}) }) val_loaders = None if hasattr(global_cfg, 'val_dataloader'): val_loaders = utils.get_data_loaders( **{ **config, 'batch_size': config['batch_size'], 'start_itr': state_dict['itr'], **global_cfg.val_dataloader })[0] val_loaders = iter(val_loaders) # Prepare inception metrics: FID and IS if global_cfg.get('use_unofficial_FID', False): get_inception_metrics = inception_utils.prepare_inception_metrics( config['inception_file'], config['parallel'], config['no_fid']) else: get_inception_metrics = inception_utils.prepare_FID_IS(global_cfg) # Prepare noise and randomly sampled label arrays # Allow for different batch sizes in G G_batch_size = max(config['G_batch_size'], config['batch_size']) z_, y_ = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) # Prepare a fixed z & y to see individual sample evolution throghout training fixed_z, fixed_y = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) fixed_z.sample_() fixed_y.sample_() # Loaders are loaded, prepare the training function if config['which_train_fn'] == 'GAN': train = train_fns.GAN_training_function(G, D, GD, z_, y_, ema, state_dict, config, val_loaders) # Else, assume debugging and use the dummy train fn elif config['which_train_fn'] == 'dummy': train = train_fns.dummy_training_function() else: train_fns_module = importlib.import_module(config['which_train_fn']) train = train_fns_module.GAN_training_function(G, D, GD, z_, y_, ema, state_dict, config, val_loaders) # Prepare Sample function for use with inception metrics if global_cfg.get('use_unofficial_FID', False): sample = functools.partial( utils.sample, G=(G_ema if config['ema'] and config['use_ema'] else G), z_=z_, y_=y_, config=config) else: sample = functools.partial( utils.sample_imgs, G=(G_ema if config['ema'] and config['use_ema'] else G), z_=z_, y_=y_, config=config) state_dict['shown_images'] = state_dict['itr'] * D_batch_size if global_cfg.get('resume_cfg', {}).get('eval', False): logger.info(f'Evaluating model.') G_ema.eval() G.eval() train_fns.test(G, D, G_ema, z_, y_, state_dict, config, sample, get_inception_metrics, experiment_name, test_log) return print('Beginning training at epoch %d...' % state_dict['epoch']) # Train for specified number of epochs, although we mostly track G iterations. for epoch in range(state_dict['epoch'], config['num_epochs']): # Which progressbar to use? TQDM or my own? if config['pbar'] == 'mine': pbar = utils.progress(loaders[0], desc=f'Epoch:{epoch}, Itr: ', displaytype='s1k' if config['use_multiepoch_sampler'] else 'eta') else: pbar = tqdm(loaders[0]) for i, (x, y) in enumerate(pbar): # Increment the iteration counter state_dict['itr'] += 1 # Make sure G and D are in training mode, just in case they got set to eval # For D, which typically doesn't have BN, this shouldn't matter much. G.train() D.train() if config['ema']: G_ema.train() if config['D_fp16']: x, y = x.to(device).half(), y.to(device) else: x, y = x.to(device), y.to(device) default_dict = train(x, y) state_dict['shown_images'] += D_batch_size metrics = default_dict['D_loss'] train_log.log(itr=int(state_dict['itr']), **metrics) summary_defaultdict2txtfig(default_dict=default_dict, prefix='train', step=state_dict['shown_images'], textlogger=textlogger) # Every sv_log_interval, log singular values if (config['sv_log_interval'] > 0) and ( not (state_dict['itr'] % config['sv_log_interval'])): train_log.log(itr=int(state_dict['itr']), **{ **utils.get_SVs(G, 'G'), **utils.get_SVs(D, 'D') }) # If using my progbar, print metrics. if config['pbar'] == 'mine': print(', '.join( ['itr: %d' % state_dict['itr']] + ['%s : %+4.3f' % (key, metrics[key]) for key in metrics]), end=' ', flush=True) # Save weights and copies as configured at specified interval if not (state_dict['itr'] % config['save_every']): if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() if config['ema']: G_ema.eval() train_fns.save_and_sample(G, D, G_ema, z_, y_, fixed_z, fixed_y, state_dict, config, experiment_name) # Test every specified interval if state_dict['itr'] == 1 or \ (config['test_every'] > 0 and state_dict['itr'] % config['test_every'] == 0) or \ (state_dict['shown_images'] % global_cfg.get('test_every_images', float('inf'))) < D_batch_size: if config['G_eval_mode']: print('Switchin G to eval mode...', flush=True) G.eval() print('\n' + config['tl_outdir']) train_fns.test(G, D, G_ema, z_, y_, state_dict, config, sample, get_inception_metrics, experiment_name, test_log) # Increment epoch counter at end of epoch state_dict['epoch'] += 1
def run(config): # Update the config dict as necessary # This is for convenience, to add settings derived from the user-specified # configuration into the config-dict (e.g. inferring the number of classes # and size of the images from the dataset, passing in a pytorch object # for the activation specified as a string) config['resolution'] = utils.imsize_dict[config['dataset']] config['n_classes'] = utils.nclass_dict[config['dataset']] config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] # By default, skip init if resuming training. if config['resume']: print('Skipping initialization for training resumption...') config['skip_init'] = True config = vae_utils.update_config_roots(config) device = 'cuda' # Seed RNG utils.seed_rng(config['seed']) # Prepare root folders if necessary utils.prepare_root(config) # Setup cudnn.benchmark for free speed torch.backends.cudnn.benchmark = True experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) print('Experiment name is %s' % experiment_name) # Next, build the model G = BigGAN.Generator(**{ **config, 'skip_init': True, 'no_optim': True }).to(device) D = BigGAN.Discriminator(**{ **config, 'skip_init': True, 'no_optim': True }).to(device) E = Encoder(**config).to(device) vgg_alter = Encoder(**{ **config, 'skip_init': True, 'no_optim': True, 'name': 'Vgg_alter' }).to(device) load_pretrained(G, config['pretrained_G_dir']) load_pretrained(D, config['pretrained_D_dir']) load_pretrained(vgg_alter, config['pretrained_vgg_alter_dir']) # If using EMA, prepare it if config['ema']: print('Preparing EMA for G with decay of {}'.format( config['ema_decay'])) E_ema = Encoder(**{ **config, 'skip_init': True, 'no_optim': True }).to(device) ema = utils.ema(E, E_ema, config['ema_decay'], config['ema_start']) else: E_ema, ema = None, None class TrainWarpper(nn.Module): def __init__(self): super(TrainWarpper, self).__init__() self.G = G self.D = D self.E = E self.vgg_alter = vgg_alter def forward(self, img, label): en_w = self.E(img) with torch.no_grad(): fake = self.G(en_w, self.G.shared(label)) logits = self.D(fake, label) vgg_logits = F.l1_loss(self.vgg_alter(img), self.vgg_alter(fake)) return fake, logits, vgg_logits Wrapper = TrainWarpper() print(G) print(D) print(E) print(vgg_alter) print('Number of params in G: {} D: {} E: {} Vgg_alter: {}'.format(*[ sum([p.data.nelement() for p in net.parameters()]) for net in [G, D, E, vgg_alter] ])) # Prepare state dict, which holds things like epoch # and itr # state_dict = { 'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config } # If loading from a pre-trained model, load weights if config['resume']: print('Loading weights...') vae_utils.load_weights( [E], state_dict, config['weights_root'], experiment_name, config['load_weights'] if config['load_weights'] else None, [E_ema if config['ema'] else None]) # If parallel, parallelize the GD module if config['parallel']: Wrapper = nn.DataParallel(Wrapper) if config['cross_replica']: patch_replication_callback(Wrapper) # Prepare loggers for stats; metrics holds test metrics, # lmetrics holds any desired training metrics. test_metrics_fname = '%s/%s_log.jsonl' % (config['logs_root'], experiment_name) train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name) print('Inception Metrics will be saved to {}'.format(test_metrics_fname)) test_log = utils.MetricsLogger(test_metrics_fname, reinitialize=(not config['resume'])) print('Training Metrics will be saved to {}'.format(train_metrics_fname)) train_log = utils.MyLogger(train_metrics_fname, reinitialize=(not config['resume']), logstyle=config['logstyle']) # Write metadata utils.write_metadata(config['logs_root'], experiment_name, config, state_dict) # Prepare data; the Discriminator's batch size is all that needs to be passed # to the dataloader, as G doesn't require dataloading. # Note that at every loader iteration we pass in enough data to complete # a full D iteration (regardless of number of D steps and accumulations) D_batch_size = (config['batch_size'] * config['num_D_steps'] * config['num_D_accumulations']) loaders = utils.get_data_loaders(**{ **config, 'batch_size': D_batch_size, 'start_itr': state_dict['itr'] }) G_batch_size = max(config['G_batch_size'], config['batch_size']) fixed_x, fixed_y = vae_utils.prepare_fixed_x(loaders[0], G_batch_size, config, experiment_name, device) # Prepare noise and randomly sampled label arrays def train(img, label): E.optim.zero_grad() img = torch.split(img, config['batch_size']) label = torch.split(label, config['batch_size']) counter = 0 for step_index in range(config['num_D_steps']): E.optim.zero_grad() fake, logits, vgg_loss = Wrapper(img[counter], label[counter]) vgg_loss = vgg_loss * config['vgg_loss_scale'] d_loss = losses.generator_loss(logits) * config['adv_loss_scale'] recon_loss = losses.recon_loss( fakes=fake, reals=img[counter]) * config['recon_loss_scale'] loss = d_loss + recon_loss + vgg_loss loss.backward() counter += 1 if config['E_ortho'] > 0.0: # Debug print to indicate we're using ortho reg in D. print('using modified ortho reg in D') utils.ortho(D, config['E_ortho']) E.optim.step() out = { 'Vgg_loss': float(vgg_loss.item()), 'D_loss': float(d_loss.item()), 'pixel_loss': float(recon_loss.item()) } return out print('Beginning training at epoch %d...' % state_dict['epoch']) # Train for specified number of epochs, although we mostly track G iterations. for epoch in range(state_dict['epoch'], config['num_epochs']): # Which progressbar to use? TQDM or my own? if config['pbar'] == 'mine': pbar = utils.progress(loaders[0], displaytype='s1k' if config['use_multiepoch_sampler'] else 'eta') else: pbar = tqdm(loaders[0]) for i, (x, y) in enumerate(pbar): # Increment the iteration counter state_dict['itr'] += 1 # Make sure G and D are in training mode, just in case they got set to eval # For D, which typically doesn't have BN, this shouldn't matter much. G.train() D.train() E.train() vgg_alter.train() if config['ema']: E_ema.train() if config['D_fp16']: x, y = x.to(device).half(), y.to(device) else: x, y = x.to(device), y.to(device) metrics = train(x, y) train_log.log(itr=int(state_dict['itr']), **metrics) # Every sv_log_interval, log singular values if (config['sv_log_interval'] > 0) and ( not (state_dict['itr'] % config['sv_log_interval'])): train_log.log(itr=int(state_dict['itr']), **{**utils.get_SVs(E, 'E')}) # If using my progbar, print metrics. if config['pbar'] == 'mine': print(', '.join( ['itr: %d' % state_dict['itr']] + ['%s : %+4.3f' % (key, metrics[key]) for key in metrics]), end=' ') # Save weights and copies as configured at specified interval if not (state_dict['itr'] % config['save_every']): if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() E.eval() if config['ema']: E_ema.eval() save_and_sample(G, E, E_ema, fixed_x, fixed_y, state_dict, config, experiment_name) # Increment epoch counter at end of epoch state_dict['epoch'] += 1
def run(config): # Update the config dict as necessary # This is for convenience, to add settings derived from the user-specified # configuration into the config-dict (e.g. inferring the number of classes # and size of the images from the dataset, passing in a pytorch object # for the activation specified as a string) config['resolution'] = utils.imsize_dict[config['dataset']] config['n_classes'] = utils.nclass_dict[config['dataset']] config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] # By default, skip init if resuming training. if config['resume']: print('Skipping initialization for training resumption...') config['skip_init'] = True config = utils.update_config_roots(config) device = 'cuda' # Seed RNG utils.seed_rng(config['seed']) # Prepare root folders if necessary utils.prepare_root(config) # Setup cudnn.benchmark for free speed torch.backends.cudnn.benchmark = True # Import the model--this line allows us to dynamically select different files. model = __import__(config['model']) experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) print('Experiment name is %s' % experiment_name) # Next, build the model D = model.Discriminator(**config).to(device) # FP16? if config['D_fp16']: print('Casting D to fp16...') D = D.half() # Consider automatically reducing SN_eps? print(D) # Prepare state dict, which holds things like epoch # and itr # state_dict = {'itr': 0, 'epoch': 0, 'config': config} # If parallel, parallelize the GD module if config['parallel']: D = nn.DataParallel(D) if config['cross_replica']: patch_replication_callback(D) # Prepare loggers for stats; metrics holds test metrics, # lmetrics holds any desired training metrics. train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name) print('Training Metrics will be saved to {}'.format(train_metrics_fname)) train_log = utils.MyLogger(train_metrics_fname, reinitialize=(not config['resume']), logstyle=config['logstyle']) # set tensorboard logger tb_logdir = '%s/%s/tblogs' % (config['logs_root'], experiment_name) if os.path.exists(tb_logdir): for filename in os.listdir(tb_logdir): if filename.startswith('events'): os.remove(os.path.join(tb_logdir, filename)) # remove previous event logs tb_writer = SummaryWriter(log_dir=tb_logdir) # Write metadata utils.write_metadata(config['logs_root'], experiment_name, config, state_dict) # Prepare data; the Discriminator's batch size is all that needs to be passed # to the dataloader, as G doesn't require dataloading. # Note that at every loader iteration we pass in enough data to complete # a full D iteration (regardless of number of D steps and accumulations) D_batch_size = (config['batch_size'] * config['num_D_steps'] * config['num_D_accumulations']) loaders = utils.get_data_loaders(**{ **config, 'batch_size': D_batch_size, 'start_itr': state_dict['itr'] }) # Prepare inception metrics: FID and IS get_inception_metrics = inception_utils.prepare_inception_metrics( config['dataset'], config['parallel'], config['no_fid']) # Loaders are loaded, prepare the training function if config['which_train_fn'] == 'MINE': train = train_fns.MINE_training_function(D, state_dict, config) # Else, assume debugging and use the dummy train fn else: train = train_fns.dummy_training_function() print('Beginning training at epoch %d...' % state_dict['epoch']) # Train for specified number of epochs, although we mostly track G iterations. for epoch in range(state_dict['epoch'], config['num_epochs']): # Which progressbar to use? TQDM or my own (mine, ok)? if config['pbar'] == 'mine': pbar = utils.progress(loaders[0], displaytype='s1k' if config['use_multiepoch_sampler'] else 'eta') else: pbar = tqdm(loaders[0]) for i, (x, y) in enumerate(pbar): # Increment the iteration counter state_dict['itr'] += 1 # Make sure G and D are in training mode, just in case they got set to eval # For D, which typically doesn't have BN, this shouldn't matter much. D.train() if config['D_fp16']: x, y = x.to(device).half(), y.to(device) else: x, y = x.to(device), y.to(device) metrics = train(x, y) print(metrics) train_log.log(itr=int(state_dict['itr']), **metrics) for metric_name in metrics: tb_writer.add_scalar('Train/%s' % metric_name, metrics[metric_name], state_dict['itr']) # If using my progbar, print metrics. if config['pbar'] == 'mine': print(', '.join( ['itr: %d' % state_dict['itr']] + ['%s : %+4.3f' % (key, metrics[key]) for key in metrics]), end=' ') # Increment epoch counter at end of epoch state_dict['epoch'] += 1
def run(config): # Update the config dict as necessary # This is for convenience, to add settings derived from the user-specified # configuration into the config-dict (e.g. inferring the number of classes # and size of the images from the dataset, passing in a pytorch object # for the activation specified as a string) config['resolution'] = utils.imsize_dict[config['dataset']] config['n_classes'] = utils.nclass_dict[config['dataset']] config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] # By default, skip init if resuming training. if config['resume']: print('Skipping initialization for training resumption...') config['skip_init'] = True config = utils.update_config_roots(config) device = 'cuda' # Seed RNG utils.seed_rng(config['seed']) # Prepare root folders if necessary utils.prepare_root(config) # Setup cudnn.benchmark for free speed torch.backends.cudnn.benchmark = True # Import the model--this line allows us to dynamically select different files. model = __import__(config['model']) experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) print('Experiment name is %s' % experiment_name) # Next, build the model G = model.Generator(**config).to(device) D = model.Discriminator(**config).to(device) E = model.ImgEncoder(**config).to(device) # E = model.Encoder(**config).to(device) # If using EMA, prepare it if config['ema']: print('Preparing EMA for G with decay of {}'.format( config['ema_decay'])) G_ema = model.Generator(**{**config, 'skip_init': True, 'no_optim': True}).to(device) ema = utils.ema(G, G_ema, config['ema_decay'], config['ema_start']) else: G_ema, ema = None, None # FP16? if config['G_fp16']: print('Casting G to float16...') G = G.half() if config['ema']: G_ema = G_ema.half() if config['D_fp16']: print('Casting D to fp16...') D = D.half() # Consider automatically reducing SN_eps? GDE = model.G_D_E(G, D, E) print('Number of params in G: {} D: {} E: {}'.format( *[sum([p.data.nelement() for p in net.parameters()]) for net in [G, D, E]])) # Prepare state dict, which holds things like epoch # and itr # state_dict = {'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config} # If loading from a pre-trained model, load weights if config['resume']: print('Loading weights...') utils.load_weights(G, D, E, state_dict, config['weights_root'], experiment_name, config['load_weights'] if config['load_weights'] else None, G_ema if config['ema'] else None) # If parallel, parallelize the GD module if config['parallel']: GDE = nn.DataParallel(GDE) if config['cross_replica']: patch_replication_callback(GDE) # Prepare data; the Discriminator's batch size is all that needs to be passed # to the dataloader, as G doesn't require dataloading. # Note that at every loader iteration we pass in enough data to complete # a full D iteration (regardless of number of D steps and accumulations) D_batch_size = (config['batch_size'] * config['num_D_steps'] * config['num_D_accumulations']) loaders, train_dataset = utils.get_data_loaders(**{**config, 'batch_size': D_batch_size, 'start_itr': state_dict['itr']}) # Prepare noise and randomly sampled label arrays # Allow for different batch sizes in G G_batch_size = max(config['G_batch_size'], config['batch_size']) z_, y_ = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) # Prepare a fixed z & y to see individual sample evolution throghout training fixed_z, fixed_y = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) fixed_z.sample_() fixed_y.sample_() print("fixed_y original: {} {}".format(fixed_y.shape, fixed_y[:10])) ## TODO: change the sample method to sample x and y fixed_x, fixed_y_of_x = utils.prepare_x_y(G_batch_size, train_dataset, experiment_name, config, device=device) # Build image pool to prevent mode collapes if config['img_pool_size'] != 0: img_pool = ImagePool(config['img_pool_size'], train_dataset.num_class,\ save_dir=os.path.join(config['imgbuffer_root'], experiment_name), resume_buffer=config['resume_buffer']) else: img_pool = None # Loaders are loaded, prepare the training function if config['which_train_fn'] == 'GAN': train = train_fns.GAN_training_function(G, D, E, GDE, ema, state_dict, config, img_pool) # Else, assume debugging and use the dummy train fn else: train = train_fns.dummy_training_function() # Prepare Sample function for use with inception metrics sample = functools.partial(utils.sample, G=(G_ema if config['ema'] and config['use_ema'] else G), z_=z_, y_=y_, config=config) # print('Beginning training at epoch %f...' % (state_dict['itr'] * D_batch_size / len(train_dataset))) print("Beginning testing at Epoch {} (iteration {})".format(state_dict['epoch'], state_dict['itr'])) if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() if config['ema']: G_ema.eval() # vc visualization # # print("VC visualization ===============") # activation_extract(G, D, E, G_ema, fixed_x, fixed_y_of_x, z_, y_, # state_dict, config, experiment_name, device, normal_eval=False, eval_vc=True, return_mask=False) # normal activation print("Normal activation ===============") activation_extract(G, D, E, G_ema, fixed_x, fixed_y_of_x, z_, y_, state_dict, config, experiment_name, device, normal_eval=True, eval_vc=False, return_mask=False) # produce normal fully activated images
def run(config): # Update the config dict as necessary # This is for convenience, to add settings derived from the user-specified # configuration into the config-dict (e.g. inferring the number of classes # and size of the images from the dataset, passing in a pytorch object # for the activation specified as a string) config['resolution'] = utils.imsize_dict[config['dataset']] config['n_classes'] = utils.nclass_dict[config['dataset']] config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] # By default, skip init if resuming training. if config['resume']: print('Skipping initialization for training resumption...') config['skip_init'] = True config = utils.update_config_roots(config) device = 'cuda' if config['base_root']: os.makedirs(config['base_root'],exist_ok=True) # Seed RNG utils.seed_rng(config['seed']) # Prepare root folders if necessary utils.prepare_root(config) # Setup cudnn.benchmark for free speed torch.backends.cudnn.benchmark = True # Import the model--this line allows us to dynamically select different files. model = __import__(config['model']) experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) print('Experiment name is %s' % experiment_name) # Next, build the model G = model.Generator(**config).to(device) D = model.Discriminator(**config).to(device) # If using EMA, prepare it if config['ema']: print('Preparing EMA for G with decay of {}'.format(config['ema_decay'])) G_ema = model.Generator(**{**config, 'skip_init':True, 'no_optim': True}).to(device) ema = utils.ema(G, G_ema, config['ema_decay'], config['ema_start']) else: G_ema, ema = None, None # FP16? if config['G_fp16']: print('Casting G to float16...') G = G.half() if config['ema']: G_ema = G_ema.half() if config['D_fp16']: print('Casting D to fp16...') D = D.half() # Consider automatically reducing SN_eps? GD = model.G_D(G, D) print(G) print(D) print('Number of params in G: {} D: {}'.format( *[sum([p.data.nelement() for p in net.parameters()]) for net in [G,D]])) # Prepare state dict, which holds things like epoch # and itr # state_dict = {'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config} # If loading from a pre-trained model, load weights if config['resume']: print('Loading weights...') utils.load_weights(G, D, state_dict, config['weights_root'], experiment_name, config['load_weights'] if config['load_weights'] else None, G_ema if config['ema'] else None, ) if G.lr_sched is not None:G.lr_sched.step(state_dict['epoch']) if D.lr_sched is not None:D.lr_sched.step(state_dict['epoch']) # If parallel, parallelize the GD module if config['parallel']: GD = nn.DataParallel(GD) if config['cross_replica']: patch_replication_callback(GD) # Prepare loggers for stats; metrics holds test metrics, # lmetrics holds any desired training metrics. test_metrics_fname = '%s/%s_log.jsonl' % (config['logs_root'], experiment_name) train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name) print('Inception Metrics will be saved to {}'.format(test_metrics_fname)) test_log = utils.MetricsLogger(test_metrics_fname, reinitialize=(not config['resume'])) print('Training Metrics will be saved to {}'.format(train_metrics_fname)) train_log = utils.MyLogger(train_metrics_fname, reinitialize=(not config['resume']), logstyle=config['logstyle']) # Write metadata utils.write_metadata(config['logs_root'], experiment_name, config, state_dict) # Prepare data; the Discriminator's batch size is all that needs to be passed # to the dataloader, as G doesn't require dataloading. # Note that at every loader iteration we pass in enough data to complete # a full D iteration (regardless of number of D steps and accumulations) D_batch_size = (config['batch_size'] * config['num_D_steps'] * config['num_D_accumulations']) loaders = utils.get_data_loaders(**{**config, 'batch_size': D_batch_size, 'start_itr': state_dict['itr']}) # Prepare inception metrics: FID and IS if not config['on_kaggle']: get_inception_metrics = inception_utils.prepare_inception_metrics(config['base_root'],config['dataset'], config['parallel'], config['no_fid']) # Prepare noise and randomly sampled label arrays # Allow for different batch sizes in G G_batch_size = max(config['G_batch_size'], config['batch_size']) if config['use_dog_cnt']: y_dist='categorical_dog_cnt' else: y_dist = 'categorical' dim_z=G.dim_z*2 if config['mix_style'] else G.dim_z z_, y_ = utils.prepare_z_y(G_batch_size, dim_z, config['n_classes'], device=device, fp16=config['G_fp16'],z_dist=config['z_dist'], threshold=config['truncated_threshold'],y_dist=y_dist) # Prepare a fixed z & y to see individual sample evolution throghout training fixed_z, fixed_y = utils.prepare_z_y(G_batch_size, dim_z, config['n_classes'], device=device, fp16=config['G_fp16'],z_dist=config['z_dist'], threshold=config['truncated_threshold'],y_dist=y_dist) fixed_z.sample_() fixed_y.sample_() # Loaders are loaded, prepare the training function if config['which_train_fn'] == 'GAN': train = train_fns.GAN_training_function(G, D, GD, z_, y_, ema, state_dict, config) # Else, assume debugging and use the dummy train fn else: train = train_fns.dummy_training_function() # Prepare Sample function for use with inception metrics sample = functools.partial(utils.sample, G=(G_ema if config['ema'] and config['use_ema'] else G), z_=z_, y_=y_, config=config) print('Beginning training at epoch %d...' % state_dict['epoch']) #I find by epoch is more convelient,so I suggest change to it.if save_every<100,I will change to py epoch by_epoch=False if config['save_every']>100 else True # Train for specified number of epochs, although we mostly track G iterations. start_time = time.time() for epoch in range(state_dict['epoch'], config['num_epochs']): # Which progressbar to use? TQDM or my own? if config['on_kaggle']: pbar = loaders[0] elif config['pbar'] == 'mine': pbar = utils.progress(loaders[0],displaytype='s1k' if config['use_multiepoch_sampler'] else 'eta') else: pbar = tqdm(loaders[0]) epoch_start_time = time.time() for i, (x, y) in enumerate(pbar): # Increment the iteration counter state_dict['itr'] += 1 # Make sure G and D are in training mode, just in case they got set to eval # For D, which typically doesn't have BN, this shouldn't matter much. G.train() D.train() if config['ema']: G_ema.train() if type(y) == list or type(y)==tuple: y=torch.cat([yi.unsqueeze(1) for yi in y],dim=1) if config['D_fp16']: x, y = x.to(device).half(), y.to(device) else: x, y = x.to(device), y.to(device) metrics = train(x, y) train_log.log(itr=int(state_dict['itr']), **metrics) # Every sv_log_interval, log singular values if (config['sv_log_interval'] > 0) and (not (state_dict['itr'] % config['sv_log_interval'])): train_log.log(itr=int(state_dict['itr']), **{**utils.get_SVs(G, 'G'), **utils.get_SVs(D, 'D')}) # If using my progbar, print metrics. if config['on_kaggle']: if i == len(loaders[0])-1: metrics_str = ', '.join(['%s : %+4.3f' % (key, metrics[key]) for key in metrics]) epoch_time = (time.time()-epoch_start_time) / 60 total_time = (time.time()-start_time) / 60 print(f"[{epoch+1}/{config['num_epochs']}][{epoch_time:.1f}min/{total_time:.1f}min] {metrics_str}") elif config['pbar'] == 'mine': if D.lr_sched is None: print(', '.join(['epoch:%d' % (epoch+1),'itr: %d' % state_dict['itr']] + ['%s : %+4.3f' % (key, metrics[key]) for key in metrics]), end=' ') else: print(', '.join(['epoch:%d' % (epoch+1),'lr:%.5f' % D.lr_sched.get_lr()[0] ,'itr: %d' % state_dict['itr']] + ['%s : %+4.3f' % (key, metrics[key]) for key in metrics]), end=' ') if not by_epoch: # Save weights and copies as configured at specified interval if not (state_dict['itr'] % config['save_every']) and not config['on_kaggle']: if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() if config['ema']: G_ema.eval() train_fns.save_and_sample(G, D, G_ema, z_, y_, fixed_z, fixed_y, state_dict, config, experiment_name) # Test every specified interval if not (state_dict['itr'] % config['test_every']) and not config['on_kaggle']: if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() train_fns.test(G, D, G_ema, z_, y_, state_dict, config, sample, get_inception_metrics, experiment_name, test_log) if by_epoch: # Save weights and copies as configured at specified interval if not ((epoch+1) % config['save_every']) and not config['on_kaggle']: if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() if config['ema']: G_ema.eval() train_fns.save_and_sample(G, D, G_ema, z_, y_, fixed_z, fixed_y, state_dict, config, experiment_name) # Test every specified interval if not ((epoch+1) % config['test_every']) and not config['on_kaggle']: if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() train_fns.test(G, D, G_ema, z_, y_, state_dict, config, sample, get_inception_metrics, experiment_name, test_log) if G_ema is not None and (epoch+1) % config['test_every'] == 0 and not config['on_kaggle']: torch.save(G_ema.state_dict(), '%s/%s/G_ema_epoch_%03d.pth' % (config['weights_root'], config['experiment_name'], epoch+1)) # Increment epoch counter at end of epoch state_dict['epoch'] += 1 if G.lr_sched is not None: G.lr_sched.step() if D.lr_sched is not None: D.lr_sched.step() if config['on_kaggle']: train_fns.generate_submission(sample, config, experiment_name)
def run(config): # Update the config dict as necessary # This is for convenience, to add settings derived from the user-specified # configuration into the config-dict (e.g. inferring the number of classes # and size of the images from the dataset, passing in a pytorch object # for the activation specified as a string) config['resolution'] = utils.imsize_dict[config['dataset']] config['n_classes'] = utils.nclass_dict[config['dataset']] config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] # By default, skip init if resuming training. if config['resume']: print('Skipping initialization for training resumption...') config['skip_init'] = True config = utils.update_config_roots(config) device = 'cpu' # Seed RNG utils.seed_rng(config['seed']) # Prepare root folders if necessary utils.prepare_root(config) # Setup cudnn.benchmark for free speed torch.backends.cudnn.benchmark = True # Import the model--this line allows us to dynamically select different files. model = __import__(config['model']) experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) experiment_name = "test_{}".format(experiment_name) print('Experiment name is %s' % experiment_name) # Next, build the model G = model.Generator(**config).to(device) D = model.Discriminator(**config).to(device) E = model.ImgEncoder(**config).to(device) # If using EMA, prepare it if config['ema']: print('Preparing EMA for G with decay of {}'.format( config['ema_decay'])) G_ema = model.Generator(**{ **config, 'skip_init': True, 'no_optim': True }).to(device) ema = utils.ema(G, G_ema, config['ema_decay'], config['ema_start']) else: G_ema, ema = None, None # FP16? if config['G_fp16']: print('Casting G to float16...') G = G.half() if config['ema']: G_ema = G_ema.half() if config['D_fp16']: print('Casting D to fp16...') D = D.half() # Consider automatically reducing SN_eps? GDE = model.G_D_E(G, D, E) # print(G) # print(D) # print(E) print("Model Created!") print('Number of params in G: {} D: {} E: {}'.format(*[ sum([p.data.nelement() for p in net.parameters()]) for net in [G, D, E] ])) # Prepare state dict, which holds things like epoch # and itr # state_dict = { 'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config } # If loading from a pre-trained model, load weights print('Loading weights...') utils.load_weights( G, D, E, state_dict, config['weights_root'], config['load_experiment_name'], config['load_weights'] if config['load_weights'] else None, G_ema if config['ema'] else None) state_dict = { 'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config } # If parallel, parallelize the GD module if config['parallel']: GDE = nn.DataParallel(GDE) if config['cross_replica']: patch_replication_callback(GDE) G_batch_size = max(config['G_batch_size'], config['batch_size']) D_batch_size = (config['batch_size'] * config['num_D_steps'] * config['num_D_accumulations']) loaders, train_dataset = utils.get_data_loaders(**{ **config, 'batch_size': D_batch_size, 'start_itr': 0 }) z_, y_ = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) # Prepare a fixed z & y to see individual sample evolution throghout training fixed_z, fixed_y = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) fixed_z.sample_() fixed_y.sample_() print("fixed_y original: {} {}".format(fixed_y.shape, fixed_y[:10])) fixed_x, fixed_y_of_x = utils.prepare_x_y(G_batch_size, train_dataset, experiment_name, config) # Prepare Sample function for use with inception metrics sample = functools.partial( utils.sample, G=(G_ema if config['ema'] and config['use_ema'] else G), z_=z_, y_=y_, config=config) G.eval() E.eval() print("check1 -------------------------------") print("state_dict['itr']", state_dict['itr']) if config['pbar'] == 'mine': pbar = utils.progress( loaders[0], displaytype='s1k' if config['use_multiepoch_sampler'] else 'eta') else: pbar = tqdm(loaders[0]) print("state_dict['itr']", state_dict['itr']) for i, (x, y) in enumerate(pbar): state_dict['itr'] += 1 if config['D_fp16']: x, y = x.to(device).half(), y.to(device) else: x, y = x.to(device), y.to(device) print("x.shape", x.shape) print("y.shape", y.shape) activation_extract(G, D, E, G_ema, x, y, z_, y_, state_dict, config, experiment_name, save_weights=False) if state_dict['itr'] == 20: break
def run(config): # Update the config dict as necessary # This is for convenience, to add settings derived from the user-specified # configuration into the config-dict (e.g. inferring the number of classes # and size of the images from the dataset, passing in a pytorch object # for the activation specified as a string) ## *** 新增 resolution 使用 I128_hdf5 数据集, 这里也许需要使用 C10数据集 config['resolution'] = utils.imsize_dict[config['dataset']] ## *** 新增 nclass_dict 加载 I128_hdf5 的类别, 这里也许需要使用 C10的类别 10类 config['n_classes'] = utils.nclass_dict[config['dataset']] ## 加载 GD的 激活函数, 都用Relu, 这里的Relu是小写,不知道是否要改大写R config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] ## 从头训练吧,么有历史的参数,不用改,默认的就是 # By default, skip init if resuming training. if config['resume']: print('Skipping initialization for training resumption...') config['skip_init'] = True ## 日志加载,也不用改应该 config = utils.update_config_roots(config) device = 'cuda' # Seed RNG ## 设置初始随机数种子,都为0,*** 需要修改为paddle的设置 utils.seed_rng(config['seed']) # Prepare root folders if necessary ## 设置日志根目录,这个应该也不用改 utils.prepare_root(config) # Setup cudnn.benchmark for free speed ## @@@ 这里不需要更改,直接注释掉,Paddle不一定需要这个设置 ## 用于加速固定网络结构的参数 # torch.backends.cudnn.benchmark = True # Import the model--this line allows us to dynamically select different files. ## *** !!! 这个方法很酷哦,直接导入BigGan的model,要看一下BigGAN里面的网络结构配置 model = __import__(config['model']) ## 不用改,把一系列配置作为名字放到了实验名称中 experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) print('Experiment name is %s' % experiment_name) # Next, build the model ## *** 导入参数,需要修改两个方法 G = model.Generator(**config).to(device) D = model.Discriminator(**config).to(device) # If using EMA, prepare it ## *** 默认不开,可以先不改EMA部分 if config['ema']: print('Preparing EMA for G with decay of {}'.format( config['ema_decay'])) G_ema = model.Generator(**{ **config, 'skip_init': True, 'no_optim': True }).to(device) ema = utils.ema(G, G_ema, config['ema_decay'], config['ema_start']) else: G_ema, ema = None, None # FP16? ## C10比较小,G和D这部分也可以暂时不改,使用默认精度 if config['G_fp16']: print('Casting G to float16...') G = G.half() if config['ema']: G_ema = G_ema.half() if config['D_fp16']: print('Casting D to fp16...') D = D.half() # Consider automatically reducing SN_eps? ## 把设置完结构G和D打包放入结构模型G_D中 GD = model.G_D(G, D) ## *** 这两个print也许可以删掉,没必要。可能源于继承的nn.Module的一些打印属性 print(G) print(D) ## *** 这个parameters也是继承torch的属性 print('Number of params in G: {} D: {}'.format( * [sum([p.data.nelement() for p in net.parameters()]) for net in [G, D]])) # Prepare state dict, which holds things like epoch # and itr # ## 初始化统计参数记录表 不用变动 state_dict = { 'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config } # If loading from a pre-trained model, load weights ## 暂时不用预训练,所以这一块不用更改 if config['resume']: print('Loading weights...') utils.load_weights( G, D, state_dict, config['weights_root'], experiment_name, config['load_weights'] if config['load_weights'] else None, G_ema if config['ema'] else None) # If parallel, parallelize the GD module ## 暂时不用管,GD 默认不并行 if config['parallel']: GD = nn.DataParallel(GD) if config['cross_replica']: patch_replication_callback(GD) ## 日志中心,应该也可以不用管,如果需要就是把IS和FID的结果看看能不能抽出来 # Prepare loggers for stats; metrics holds test metrics, # lmetrics holds any desired training metrics. test_metrics_fname = '%s/%s_log.jsonl' % (config['logs_root'], experiment_name) train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name) print('Inception Metrics will be saved to {}'.format(test_metrics_fname)) test_log = utils.MetricsLogger(test_metrics_fname, reinitialize=(not config['resume'])) print('Training Metrics will be saved to {}'.format(train_metrics_fname)) train_log = utils.MyLogger(train_metrics_fname, reinitialize=(not config['resume']), logstyle=config['logstyle']) ## 这个才是重要的,这个是用来做结果统计的。 # Write metadata utils.write_metadata(config['logs_root'], experiment_name, config, state_dict) ## *** D的数据加载,加载的过程中,get_data_loaders用到了torchvision的transforms方法 # Prepare data; the Discriminator's batch size is all that needs to be passed # to the dataloader, as G doesn't require dataloading. # Note that at every loader iteration we pass in enough data to complete # a full D iteration (regardless of number of D steps and accumulations) D_batch_size = (config['batch_size'] * config['num_D_steps'] * config['num_D_accumulations']) loaders = utils.get_data_loaders(**{ **config, 'batch_size': D_batch_size, 'start_itr': state_dict['itr'] }) ## 准备评价指标,FID和IS的计算流程,可以使用np版本计算,也不用改 # Prepare inception metrics: FID and IS get_inception_metrics = inception_utils.prepare_inception_metrics( config['dataset'], config['parallel'], config['no_fid']) ## 准备噪声和随机采样的标签组 # Prepare noise and randomly sampled label arrays # Allow for different batch sizes in G G_batch_size = max(config['G_batch_size'], config['batch_size']) ## *** 有一部分torch的numpy用法,需要更改一下,获得噪声和标签 z_, y_ = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) # Prepare a fixed z & y to see individual sample evolution throghout training ## *** 有一部分torch的numpy用法,需要更改一下,获得噪声和标签 ## TODO 获得两份噪声和标签,有社么用意吗? fixed_z, fixed_y = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) ## *** 从Distribution中获得采样的方法,可以选择高斯采样和categorical采样 fixed_z.sample_() fixed_y.sample_() # Loaders are loaded, prepare the training function ## *** 实例化GAN_training_function训练流程 if config['which_train_fn'] == 'GAN': train = train_fns.GAN_training_function(G, D, GD, z_, y_, ema, state_dict, config) # Else, assume debugging and use the dummy train fn ## 如果没有指定训练模型,那么就用假训走一下流程Debug else: train = train_fns.dummy_training_function() # Prepare Sample function for use with inception metrics ## *** 把函数utils.sample中部分入参事先占掉,定义为新的函数sample sample = functools.partial( utils.sample, G=(G_ema if config['ema'] and config['use_ema'] else G), z_=z_, y_=y_, config=config) print('Beginning training at epoch %d...' % state_dict['epoch']) # Train for specified number of epochs, although we mostly track G iterations. for epoch in range(state_dict['epoch'], config['num_epochs']): # Which progressbar to use? TQDM or my own? if config['pbar'] == 'mine': ## 这一部分无需翻 ## !!! loaders[0] 代表了数据采样对象 pbar = utils.progress(loaders[0], displaytype='s1k' if config['use_multiepoch_sampler'] else 'eta') else: pbar = tqdm(loaders[0]) for i, (x, y) in enumerate(pbar): # Increment the iteration counter state_dict['itr'] += 1 # Make sure G and D are in training mode, just in case they got set to eval # For D, which typically doesn't have BN, this shouldn't matter much. ## *** 继承nn.Module中的train, 对应的是 G.train() D.train() if config['ema']: G_ema.train() if config['D_fp16']: x, y = x.to(device).half(), y.to(device) else: x, y = x.to(device), y.to(device) ## *** 把数据和标签放入训练函数里,train本身有很多需要改写 metrics = train(x, y) ## 记录日志,把metrics信息都输入日志 train_log.log(itr=int(state_dict['itr']), **metrics) # Every sv_log_interval, log singular values ## 记录资格迹的变化日志 if (config['sv_log_interval'] > 0) and ( not (state_dict['itr'] % config['sv_log_interval'])): train_log.log(itr=int(state_dict['itr']), **{ **utils.get_SVs(G, 'G'), **utils.get_SVs(D, 'D') }) # If using my progbar, print metrics. if config['pbar'] == 'mine': print(', '.join( ['itr: %d' % state_dict['itr']] + ['%s : %+4.3f' % (key, metrics[key]) for key in metrics]), end=' ') # Save weights and copies as configured at specified interval ## 默认每2000步记录一次结果 if not (state_dict['itr'] % config['save_every']): if config['G_eval_mode']: print('Switchin G to eval mode...') ## *** module中的方法 G.eval() ## 如果采用指数滑动平均 if config['ema']: G_ema.eval() train_fns.save_and_sample(G, D, G_ema, z_, y_, fixed_z, fixed_y, state_dict, config, experiment_name) # Test every specified interval ## 默认每5000步测试一次 if not (state_dict['itr'] % config['test_every']): if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() train_fns.test(G, D, G_ema, z_, y_, state_dict, config, sample, get_inception_metrics, experiment_name, test_log) # Increment epoch counter at end of epoch state_dict['epoch'] += 1
def run(config): # Update the config dict as necessary # This is for convenience, to add settings derived from the user-specified # configuration into the config-dict (e.g. inferring the number of classes # and size of the images from the dataset, passing in a pytorch object # for the activation specified as a string) config['resolution'] = utils.imsize_dict[config['dataset']] config['n_classes'] = utils.nclass_dict[config['dataset']] config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] # By default, skip init if resuming training. if config['resume']: print('Skipping initialization for training resumption...') config['skip_init'] = True config = vae_utils.update_config_roots(config) device = 'cuda' # Seed RNG utils.seed_rng(config['seed']) # Prepare root folders if necessary utils.prepare_root(config) # Setup cudnn.benchmark for free speed torch.backends.cudnn.benchmark = True # Import the model--this line allows us to dynamically select different files. experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) print('Experiment name is %s' % experiment_name) # Next, build the model G = Generator(**config).to(device) # If using EMA, prepare it if config['ema']: print('Preparing EMA for G with decay of {}'.format( config['ema_decay'])) G_ema = Generator(**{ **config, 'skip_init': True, 'no_optim': True }).to(device) ema = utils.ema(G, G_ema, config['ema_decay'], config['ema_start']) else: G_ema, ema = None, None print(G) print('Number of params in E: {}'.format( *[sum([p.data.nelement() for p in net.parameters()]) for net in [G]])) # Prepare state dict, which holds things like epoch # and itr # state_dict = { 'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config, 'best_precise': 0.0 } # If loading from a pre-trained model, load weights if config['resume']: print('Loading weights...') vae_utils.load_weights( [G], state_dict, config['weights_root'], experiment_name, config['load_weights'] if config['load_weights'] else None, [G_ema] if config['ema'] else [None]) class Wrapper(nn.Module): def __init__(self): super(Wrapper, self).__init__() self.G = G def forward(self, w, y): x = self.G(w, self.G.shared(y)) return x W = Wrapper() # If parallel, parallelize the GD module if config['parallel']: W = nn.DataParallel(W) if config['cross_replica']: patch_replication_callback(W) # Prepare loggers for stats; metrics holds test metrics, # lmetrics holds any desired training metrics. test_metrics_fname = '%s/%s_log.jsonl' % (config['logs_root'], experiment_name) train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name) print('Inception Metrics will be saved to {}'.format(test_metrics_fname)) test_log = utils.MetricsLogger(test_metrics_fname, reinitialize=(not config['resume'])) print('Training Metrics will be saved to {}'.format(train_metrics_fname)) train_log = utils.MyLogger(train_metrics_fname, reinitialize=(not config['resume']), logstyle=config['logstyle']) # Write metadata utils.write_metadata(config['logs_root'], experiment_name, config, state_dict) get_inception_metrics = inception_utils.prepare_inception_metrics( config['dataset'], config['parallel'], config['data_root'], config['no_fid']) z_, y_ = utils.prepare_z_y(config['batch_size'], G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) fixed_w, fixed_y = utils.prepare_z_y(config['batch_size'], G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) fixed_w.sample_() fixed_y.sample_() G_scheduler = torch.optim.lr_scheduler.StepLR(G.optim, step_size=50, gamma=0.1) MSE = torch.nn.MSELoss(reduction='mean') def train(w, img): y_.sample_() G.optim.zero_grad() x = W(w, y_) loss = MSE(x, img) loss.backward() if config['E_ortho'] > 0.0: # Debug print to indicate we're using ortho reg in D. print('using modified ortho reg in E') utils.ortho(G, config['G_ortho']) G.optim.step() out = {' loss': float(loss.item())} if config['ema']: ema.update(state_dict['itr']) del loss, x return out class Embed(nn.Module): def __init__(self): super(Embed, self).__init__() embed = np.load('/ghome/fengrl/home/FGAN/embed_ema.npy') self.dense = nn.Linear(120, 120, bias=False) self.embed = torch.tensor(embed, requires_grad=False) self.dense.load_state_dict({'weight': self.embed}) for param in self.dense.parameters(): param.requires_grad = False def forward(self, z): z = self.dense(z) return z embedding = Embed().to(device) fixed_w = embedding(fixed_w) sample = functools.partial( sample_with_embed, embed=embedding, G=(G_ema if config['ema'] and config['use_ema'] else G), z_=z_, y_=y_, config=config) batch_size = config['batch_size'] * config['num_D_steps'] * config[ 'num_D_accumulations'] loader = sampled_ssgan.get_SSGAN_sample_loader( **{ **config, 'batch_size': batch_size, 'start_itr': state_dict['itr'], 'is_slice': False }) print('Beginning training at epoch %d...' % state_dict['epoch']) # Train for specified number of epochs, although we mostly track G iterations. for epoch in range(state_dict['epoch'], config['num_epochs']): # Which progressbar to use? TQDM or my own? if config['pbar'] == 'mine': pbar = utils.progress(loader, displaytype='eta') else: pbar = tqdm(loader) for i, (img, z, w) in enumerate(pbar): # Increment the iteration counter state_dict['itr'] += 1 # Make sure G and D are in training mode, just in case they got set to eval # For D, which typically doesn't have BN, this shouldn't matter much. G.train() if config['ema']: G_ema.train() img, w = img.to(device), w.to(device) img = torch.split(img, config['batch_size']) w = torch.split(w, config['batch_size']) counter = 0 metrics = train(w[counter], img[counter]) counter += 1 del img, w train_log.log(itr=int(state_dict['itr']), **metrics) # Every sv_log_interval, log singular values if (config['sv_log_interval'] > 0) and ( not (state_dict['itr'] % config['sv_log_interval'])): train_log.log(itr=int(state_dict['itr']), **{**utils.get_SVs(G, 'G')}) # If using my progbar, print metrics. if config['pbar'] == 'mine': print(', '.join( ['itr: %d' % state_dict['itr']] + ['%s : %+4.3f' % (key, metrics[key]) for key in metrics]), end=' ') # Save weights and copies as configured at specified interval if not (state_dict['itr'] % config['save_every']): if config['G_eval_mode']: print('Switchin e to eval mode...') G.eval() if config['ema']: G_ema.eval() train_fns.save_and_sample(G, None, G_ema, z_, y_, fixed_w, fixed_y, state_dict, config, experiment_name) # Test every specified interval if not (state_dict['itr'] % config['test_every']): if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() train_fns.test(G, None, G_ema, z_, y_, state_dict, config, sample, get_inception_metrics, experiment_name, test_log) # Increment epoch counter at end of epoch state_dict['epoch'] += 1 G_scheduler.step()
def run(config): if config["G_path"] is None: # Download a pre-trained G if necessary download_G() config["G_path"] = f'checkpoints/138k' G, state_dict, device, experiment_name = load_G(config) # If parallel, parallelize the GD module if config['parallel']: G = nn.DataParallel(G) if config['cross_replica']: patch_replication_callback(G) pad = get_direction_padding_fn(config) ndirs = config["ndirs"] if config["directions_to_vis"] is None else len( config["directions_to_vis"]) path_sizes = torch.tensor([config["path_size"]] * ndirs, dtype=torch.float32) interp_z, interp_y = utils.prepare_z_y(config["n_samples"], G.module.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) interp_z.sample_() interp_y.sample_() if config['fix_class'] is not None: interp_y = interp_y.new_full(interp_y.size(), config['fix_class']) interp_y_ = G.module.shared(interp_y) direction_size = config["dim_z"] if config[ "search_space"] == "all" else config["ndirs"] if config['load_A'] == 'random': print('Visualizing RANDOM directions') A = torch.randn(ndirs, direction_size) A_name = 'random' nn.init.kaiming_normal_(A) elif config['load_A'] == 'coord': print('Visualizing COORDINATE directions') A = torch.eye(ndirs, direction_size) A_name = 'coord' else: print('Visualizing PRE-TRAINED directions') A = torch.load(config["load_A"]) A_name = 'pretrained' A = A.cuda() Q = pad(fast_gram_schmidt(A)) if not config["no_ortho"] else pad(A) visuals_dir = f'visuals/{experiment_name}/{A_name}' os.makedirs(visuals_dir, exist_ok=True) print('Generating interpolation videos...') visualize_directions(G, interp_z, interp_y_, path_sizes=path_sizes, Q=Q, base_path=visuals_dir, interp_steps=180, interp_mode='smooth_center', high_quality=True, quiet=False, minibatch_size=config["val_minibatch_size"], directions_to_vis=config["directions_to_vis"])
def run(config): # Update the config dict as necessary # This is for convenience, to add settings derived from the user-specified # configuration into the config-dict (e.g. inferring the number of classes # and size of the images from the dataset, passing in a pytorch object # for the activation specified as a string) config['resolution'] = utils.imsize_dict[config['dataset']] config['n_classes'] = utils.nclass_dict[config['dataset']] config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] # By default, skip init if resuming training. if config['resume']: print('Skipping initialization for training resumption...') config['skip_init'] = True config = vae_utils.update_config_roots(config) device = 'cuda' # Seed RNG utils.seed_rng(config['seed']) # Prepare root folders if necessary utils.prepare_root(config) # Setup cudnn.benchmark for free speed torch.backends.cudnn.benchmark = True # Import the model--this line allows us to dynamically select different files. model = import_module('Network.' + config['model']) experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) print('Experiment name is %s' % experiment_name) # Next, build the model G = model.Generator(**config).to(device) D = model.Discriminator(**config).to(device) L = model.LatentBinder(**config).to(device) I = Invert.Invert(**config).to(device) E = Encoder.Encoder(**config).to(device) Decoder = model.Decoder(I, E, G, D, L).to(device) # If using EMA, prepare it if config['ema']: print('Preparing EMA for G with decay of {}'.format( config['ema_decay'])) G_ema = model.Generator(name='G_ema', **{ **config, 'skip_init': True, 'no_optim': True }).to(device) gema = utils.ema(G, G_ema, config['ema_decay'], config['ema_start']) print('Preparing EMA for Invert with decay of {}'.format( config['ema_decay'])) I_ema = Invert.Invert(name='Invert_ema', **{ **config, 'skip_init': True, 'no_optim': True }).to(device) iema = utils.ema(I, I_ema, config['ema_decay'], config['ema_start']) print('Preparing EMA for Encoder with decay of {}'.format( config['ema_decay'])) E_ema = Encoder.Encoder(name='Encoder_ema', **{ **config, 'skip_init': True, 'no_optim': True }).to(device) eema = utils.ema(E, E_ema, config['ema_decay'], config['ema_start']) else: G_ema, gema, I_ema, iema, E_ema, eema = None, None, None, None, None, None # FP16? We should also half other components of Deocer, but as we will not use FP16, we simply # not implement this. if config['G_fp16']: print('Casting G to float16...') G = G.half() if config['ema']: G_ema = G_ema.half() if config['D_fp16']: print('Casting D to fp16...') D = D.half() # Consider automatically reducing SN_eps? print(G) print(D) print(I) print(E) print(L) print( 'Number of params in G: {} D: {} Invert: {} Encoder: {} LatentBinder: {}' .format(*[ sum([p.data.nelement() for p in net.parameters()]) for net in [G, D, I, E, L] ])) # Prepare state dict, which holds things like epoch # and itr # state_dict = { 'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config } # If loading from a pre-trained model, load weights if config['resume']: print('Loading weights...') vae_utils.load_weights( [G, D, I, E, L], state_dict, config['weights_root'], experiment_name, config['load_weights'] if config['load_weights'] else None, [G_ema, I_ema, E_ema] if config['ema'] else None) # If parallel, parallelize the GD module if config['parallel']: # Decoder = nn.DataParallel(Decoder) # Using custom dataparallel to save GPU memory Decoder = parallel_utils.DataParallelModel(Decoder) if config['cross_replica']: patch_replication_callback(Decoder) # Prepare loggers for stats; metrics holds test metrics, # lmetrics holds any desired training metrics. test_metrics_fname = '%s/%s_log.jsonl' % (config['logs_root'], experiment_name) train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name) print('Inception Metrics will be saved to {}'.format(test_metrics_fname)) test_log = utils.MetricsLogger(test_metrics_fname, reinitialize=(not config['resume'])) print('Training Metrics will be saved to {}'.format(train_metrics_fname)) train_log = utils.MyLogger(train_metrics_fname, reinitialize=(not config['resume']), logstyle=config['logstyle']) # Write metadata utils.write_metadata(config['logs_root'], experiment_name, config, state_dict) # Prepare data; the Discriminator's batch size is all that needs to be passed # to the dataloader, as G doesn't require dataloading. # Note that at every loader iteration we pass in enough data to complete # a full D iteration (regardless of number of D steps and accumulations) D_batch_size = (config['batch_size'] * config['num_D_steps'] * config['num_D_accumulations']) loaders = utils.get_data_loaders(**{ **config, 'batch_size': D_batch_size, 'start_itr': state_dict['itr'] }) # Prepare inception metrics: FID and IS get_inception_metrics = inception_utils.prepare_inception_metrics( config['dataset'], config['parallel'], config['data_root'], config['no_fid']) # Prepare vgg for recon_loss, considering loss is parallel, it's no need for vgg to be parallel # vgg is pretrained on imagenet, so we cannot use it. # vgg = load_vgg_from_local(parallel=False) # Prepare KNN for evaluating encoder. KNN = vae_utils.KNN(loaders[0], anchor_num=10, K=4) # Prepare noise and randomly sampled label arrays # Allow for different batch sizes in G G_batch_size = max(config['G_batch_size'], config['batch_size']) z_, y_ = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) # Prepare fake labels for encoder. _, ey_ = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) # Prepare a fixed z & y to see individual sample evolution throghout training fixed_z, fixed_y = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) fixed_x, _ = vae_utils.prepare_fixed_x(loaders[0], G_batch_size, config, experiment_name, device) fixed_z.sample_() fixed_y.sample_() # Loaders are loaded, prepare the training function if config['which_train_fn'] == 'GAN': # train = train_vae_fns.VAE_training_function(G, D, E, I, L, Decoder, z_, y_, ey_, # [gema, iema, eema], state_dict, vgg, config) train = train_vae_fns.parallel_training_function( G, D, E, I, L, Decoder, z_, y_, ey_, [gema, iema, eema], state_dict, config) # Else, assume debugging and use the dummy train fn else: train = train_vae_fns.dummy_training_function() # Prepare Sample function for use with inception metrics sample = functools.partial( vae_utils.sample, Invert=(I_ema if config['ema'] and config['use_ema'] else I), G=(G_ema if config['ema'] and config['use_ema'] else G), z_=z_, y_=y_, config=config) print('Beginning training at epoch %d...' % state_dict['epoch']) # Train for specified number of epochs, although we mostly track G iterations. for epoch in range(state_dict['epoch'], config['num_epochs']): # Which progressbar to use? TQDM or my own? if config['pbar'] == 'mine': pbar = utils.progress(loaders[0], displaytype='s1k' if config['use_multiepoch_sampler'] else 'eta') else: pbar = tqdm(loaders[0]) for i, (x, y) in enumerate(pbar): # Increment the iteration counter state_dict['itr'] += 1 # Make sure G and D are in training mode, just in case they got set to eval # For D, which typically doesn't have BN, this shouldn't matter much. G.train() D.train() I.train() E.train() L.train() if config['ema']: G_ema.train() I_ema.train() E_ema.train() if config['D_fp16']: x, y = x.to(device).half(), y.to(device) else: x, y = x.to(device), y.to(device) metrics = train(x) train_log.log(itr=int(state_dict['itr']), **metrics) # Every sv_log_interval, log singular values if (config['sv_log_interval'] > 0) and ( not (state_dict['itr'] % config['sv_log_interval'])): train_log.log(itr=int(state_dict['itr']), **{ **utils.get_SVs(G, 'G'), **utils.get_SVs(D, 'D'), **utils.get_SVs(I, 'Invert'), **utils.get_SVs(E, 'Encoder'), **utils.get_SVs(L, 'LatentBinder') }) # If using my progbar, print metrics. if config['pbar'] == 'mine': print(', '.join( ['itr: %d' % state_dict['itr']] + ['%s : %+4.3f' % (key, metrics[key]) for key in metrics]), end=' ') # Save weights and copies as configured at specified interval if not (state_dict['itr'] % config['save_every']): if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() I.eval() E.eval() if config['ema']: G_ema.eval() I_ema.eval() E_ema.eval() train_vae_fns.save_and_sample(G, D, E, I, L, G_ema, I_ema, E_ema, z_, y_, fixed_z, fixed_y, fixed_x, state_dict, config, experiment_name) # Test every specified interval if not (state_dict['itr'] % config['test_every']): if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() I.eval() E.eval() train_vae_fns.test(G, D, E, I, L, KNN, G_ema, I_ema, E_ema, z_, y_, state_dict, config, sample, get_inception_metrics, experiment_name, test_log) # Increment epoch counter at end of epoch state_dict['epoch'] += 1
def run(config): if config['wandb_entity'] is not None: init_wandb(config, config['experiment_name'], config['wandb_entity'], 'imagenet') if config["G_path"] is None: # Download a pre-trained G if necessary download_G() config["G_path"] = 'checkpoints/138k' G, state_dict, device, experiment_name = load_G(config) # If parallel, parallelize the GD module if config['parallel']: G = nn.DataParallel(DataParallelLoss(G)) if config['cross_replica']: patch_replication_callback(G) num_gpus = torch.cuda.device_count() print(f'Using {num_gpus} GPUs') # If search_space != 'all', then we need to pad the z components that we are leaving alone: pad = get_direction_padding_fn(config) direction_size = config['dim_z'] if config[ 'search_space'] == 'all' else config['ndirs'] # A is our (ndirs, |z|) matrix of directions, where ndirs indicates the number of directions we want to learn if config['load_A'] == 'coords': print('Initializing with standard basis directions') A = torch.nn.Parameter(torch.eye(config['ndirs'], direction_size, device=device), requires_grad=True) elif config['load_A'] == 'random': print('Initializing with random directions') A = torch.nn.Parameter(torch.empty(config['ndirs'], direction_size, device=device), requires_grad=True) torch.nn.init.kaiming_normal_(A) else: raise NotImplementedError # We only learn A; G is left frozen during training: optim = torch.optim.Adam(params=[A], lr=config['A_lr']) # Allow for different batch sizes in G G_batch_size = max(config['G_batch_size'], config['batch_size']) z_, y_ = utils.prepare_z_y(G_batch_size, G.module.G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) # Prepare a fixed z & y to see individual sample evolution throghout training fixed_z, fixed_y = utils.prepare_z_y(G_batch_size, G.module.G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) fixed_z.sample_() fixed_y.sample_() interp_z, interp_y = utils.prepare_z_y(config["n_samples"], G.module.G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) interp_z.sample_() interp_y.sample_() if config['fix_class'] is not None: y_ = y_.new_full(y_.size(), config['fix_class']) fixed_y = fixed_y.new_full(fixed_y.size(), config['fix_class']) interp_y = interp_y.new_full(interp_y.size(), config['fix_class']) print('Beginning training at epoch %d...' % state_dict['epoch']) # Train for specified number of epochs, although we mostly track G iterations. iters_per_epoch = 1000 dummy_loader = [None] * iters_per_epoch # We don't need any real data path_size = config['path_size'] # Simply stores a |z|-dimensional one-hot vector indicating each direction we are learning: direction_indicators = torch.eye(config['ndirs']).to(device) G.eval() G.module.optim = optim writer = SummaryWriter('%s/%s' % (config['logs_root'], experiment_name)) sample_sheet = train_fns.save_and_sample(G.module.G, None, G.module.G, z_, y_, fixed_z, fixed_y, state_dict, config, experiment_name) writer.add_image('samples', sample_sheet, 0) interp_y_ = G.module.G.shared(interp_y) # Make directions orthogonal via Gram Schmidt: Q = pad(fast_gram_schmidt(A)) if not config["no_ortho"] else pad(A) if config["vis_during_training"]: print("Generating initial visualizations...") interp_vis = visualize_directions(G.module.G, interp_z, interp_y_, path_sizes=path_size, Q=Q, high_quality=False, npv=1) for w_ix in range(config['ndirs']): writer.add_video('G_ema/w%03d' % w_ix, interp_vis[w_ix], 0, fps=24) for epoch in range(state_dict['epoch'], config['num_epochs']): if config['pbar'] == 'mine': pbar = utils.progress(dummy_loader, displaytype='s1k' if config['use_multiepoch_sampler'] else 'eta') else: pbar = tqdm(dummy_loader) for i, _ in enumerate(pbar): state_dict['itr'] += 1 z_.sample_() if config['fix_class'] is None: y_.sample_() y = G.module.G.shared(y_) sampled_directions = torch.randint(low=0, high=config['ndirs'], size=(G_batch_size, ), device=device) # Distances are sampled from U[-path_size, path_size]: distances = torch.rand(G_batch_size, 1, device=device).mul( 2 * path_size).add(-path_size) # w_sampled is an (N, ndirs)-shaped tensor. If i indexes batch elements and j indexes directions, then # w_sampled[i, j] represents how far we will move z[i] in the direction Q[j]. The final z[i] will be the sum # over all directions stored in the rows of Q. w_sampled = direction_indicators[sampled_directions] * distances # TODO: The Q.repeat below is a DataParallel hack to make sure each GPU gets the same copy of the Q matrix. # There is almost certainly a cleaner way to do this. # Hessian Penalty taken w.r.t. w_sampled, NOT z: penalty = G(z_, y, w=w_sampled, Q=Q.repeat(num_gpus, 1)).mean() optim.zero_grad() penalty.backward() optim.step() # re-orthogonalize A for visualizations and the next training iteration: Q = pad(fast_gram_schmidt(A)) if not config["no_ortho"] else pad(A) # Log metrics to TensorBoard/WandB: cur_training_iter = epoch * iters_per_epoch + i writer.add_scalar('Metrics/hessian_penalty', penalty.item(), cur_training_iter) writer.add_scalar('Metrics/direction_norm', A.pow(2).mean().pow(0.5).item(), cur_training_iter) # Save directions and log visuals: if not (state_dict['itr'] % config['save_every']): torch.save( A.cpu().detach(), '%s/%s/A_%06d.pt' % (config['weights_root'], experiment_name, cur_training_iter)) if config["vis_during_training"]: interp_vis = visualize_directions(G.module.G, interp_z, interp_y_, path_sizes=path_size, Q=Q, high_quality=False, npv=1) for w_ix in range(config['ndirs']): writer.add_video('G_ema/w%03d' % w_ix, interp_vis[w_ix], cur_training_iter, fps=24) state_dict['epoch'] += 1