def matrix(*rows, device=None): assert all(len(row) == len(rows[0]) for row in rows) elems = [x for row in rows for x in row] ref = [x for x in elems if isinstance(x, torch.Tensor)] if len(ref) == 0: return misc.constant(np.asarray(rows), device=device) assert device is None or device == ref[0].device elems = [ x if isinstance(x, torch.Tensor) else misc.constant(x, shape=ref[0].shape, device=ref[0].device) for x in elems ] return torch.stack(elems, dim=-1).reshape(ref[0].shape + (len(rows), -1))
def on_train_batch_end(self, outputs, batch, batch_idx, dataloader_idx): # Update G_ema. ema_nimg = self.ema_kimg * 1000 batch_size = batch[0].shape[0] self.cur_nimg = (self.global_step + 1) * batch_size if self.ema_rampup is not None: ema_nimg = min(ema_nimg, self.cur_nimg * self.ema_rampup) ema_beta = 0.5 ** (batch_size / max(ema_nimg, 1e-8)) for p_ema, p in zip(self.G_ema.parameters(), self.G.parameters()): p_ema.copy_(p.lerp(p_ema, ema_beta)) for b_ema, b in zip(self.G_ema.buffers(), self.G.buffers()): b_ema.copy_(b) # Execute ADA heuristic. if self.augment_pipe is not None and self.ada_target is not None and (self.global_step % self.ada_interval == 0): logit_sign = self.logit_sign.compute() adjust = np.sign(logit_sign - self.ada_target) * (batch_size * self.ada_interval) / (self.ada_kimg * 1000) self.augment_pipe.p.copy_((self.augment_pipe.p + adjust).max(misc.constant(0, device=self.device)))
def forward(self, images, debug_percentile=None): assert isinstance(images, torch.Tensor) and images.ndim == 4 batch_size, num_channels, height, width = images.shape device = images.device if debug_percentile is not None: debug_percentile = torch.as_tensor(debug_percentile, dtype=torch.float32, device=device) # ------------------------------------- # Select parameters for pixel blitting. # ------------------------------------- # Initialize inverse homogeneous 2D transform: G_inv @ pixel_out ==> pixel_in I_3 = torch.eye(3, device=device) G_inv = I_3 # Apply x-flip with probability (xflip * strength). if self.xflip > 0: i = torch.floor(torch.rand([batch_size], device=device) * 2) i = torch.where( torch.rand([batch_size], device=device) < self.xflip * self.p, i, torch.zeros_like(i)) if debug_percentile is not None: i = torch.full_like(i, torch.floor(debug_percentile * 2)) G_inv = G_inv @ scale2d_inv(1 - 2 * i, 1) # Apply 90 degree rotations with probability (rotate90 * strength). if self.rotate90 > 0: i = torch.floor(torch.rand([batch_size], device=device) * 4) i = torch.where( torch.rand([batch_size], device=device) < self.rotate90 * self.p, i, torch.zeros_like(i)) if debug_percentile is not None: i = torch.full_like(i, torch.floor(debug_percentile * 4)) G_inv = G_inv @ rotate2d_inv(-np.pi / 2 * i) # Apply integer translation with probability (xint * strength). if self.xint > 0: t = (torch.rand([batch_size, 2], device=device) * 2 - 1) * self.xint_max t = torch.where( torch.rand([batch_size, 1], device=device) < self.xint * self.p, t, torch.zeros_like(t)) if debug_percentile is not None: t = torch.full_like(t, (debug_percentile * 2 - 1) * self.xint_max) G_inv = G_inv @ translate2d_inv(torch.round(t[:, 0] * width), torch.round(t[:, 1] * height)) # -------------------------------------------------------- # Select parameters for general geometric transformations. # -------------------------------------------------------- # Apply isotropic scaling with probability (scale * strength). if self.scale > 0: s = torch.exp2( torch.randn([batch_size], device=device) * self.scale_std) s = torch.where( torch.rand([batch_size], device=device) < self.scale * self.p, s, torch.ones_like(s)) if debug_percentile is not None: s = torch.full_like( s, torch.exp2( torch.erfinv(debug_percentile * 2 - 1) * self.scale_std)) G_inv = G_inv @ scale2d_inv(s, s) # Apply pre-rotation with probability p_rot. p_rot = 1 - torch.sqrt( (1 - self.rotate * self.p).clamp(0, 1)) # P(pre OR post) = p if self.rotate > 0: theta = (torch.rand([batch_size], device=device) * 2 - 1) * np.pi * self.rotate_max theta = torch.where( torch.rand([batch_size], device=device) < p_rot, theta, torch.zeros_like(theta)) if debug_percentile is not None: theta = torch.full_like(theta, (debug_percentile * 2 - 1) * np.pi * self.rotate_max) G_inv = G_inv @ rotate2d_inv(-theta) # Before anisotropic scaling. # Apply anisotropic scaling with probability (aniso * strength). if self.aniso > 0: s = torch.exp2( torch.randn([batch_size], device=device) * self.aniso_std) s = torch.where( torch.rand([batch_size], device=device) < self.aniso * self.p, s, torch.ones_like(s)) if debug_percentile is not None: s = torch.full_like( s, torch.exp2( torch.erfinv(debug_percentile * 2 - 1) * self.aniso_std)) G_inv = G_inv @ scale2d_inv(s, 1 / s) # Apply post-rotation with probability p_rot. if self.rotate > 0: theta = (torch.rand([batch_size], device=device) * 2 - 1) * np.pi * self.rotate_max theta = torch.where( torch.rand([batch_size], device=device) < p_rot, theta, torch.zeros_like(theta)) if debug_percentile is not None: theta = torch.zeros_like(theta) G_inv = G_inv @ rotate2d_inv(-theta) # After anisotropic scaling. # Apply fractional translation with probability (xfrac * strength). if self.xfrac > 0: t = torch.randn([batch_size, 2], device=device) * self.xfrac_std t = torch.where( torch.rand([batch_size, 1], device=device) < self.xfrac * self.p, t, torch.zeros_like(t)) if debug_percentile is not None: t = torch.full_like( t, torch.erfinv(debug_percentile * 2 - 1) * self.xfrac_std) G_inv = G_inv @ translate2d_inv(t[:, 0] * width, t[:, 1] * height) # ---------------------------------- # Execute geometric transformations. # ---------------------------------- # Execute if the transform is not identity. if G_inv is not I_3: # Calculate padding. cx = (width - 1) / 2 cy = (height - 1) / 2 cp = matrix([-cx, -cy, 1], [cx, -cy, 1], [cx, cy, 1], [-cx, cy, 1], device=device) # [idx, xyz] cp = G_inv @ cp.t() # [batch, xyz, idx] Hz_pad = self.Hz_geom.shape[0] // 4 margin = cp[:, :2, :].permute(1, 0, 2).flatten(1) # [xy, batch * idx] margin = torch.cat([-margin, margin]).max(dim=1).values # [x0, y0, x1, y1] margin = margin + misc.constant( [Hz_pad * 2 - cx, Hz_pad * 2 - cy] * 2, device=device) margin = margin.max(misc.constant([0, 0] * 2, device=device)) margin = margin.min( misc.constant([width - 1, height - 1] * 2, device=device)) mx0, my0, mx1, my1 = margin.ceil().to(torch.int32) # Pad image and adjust origin. images = torch.nn.functional.pad(input=images, pad=[mx0, mx1, my0, my1], mode='reflect') G_inv = translate2d((mx0 - mx1) / 2, (my0 - my1) / 2) @ G_inv # Upsample. images = upfirdn2d.upsample2d(x=images, f=self.Hz_geom, up=2) G_inv = scale2d(2, 2, device=device) @ G_inv @ scale2d_inv( 2, 2, device=device) G_inv = translate2d(-0.5, -0.5, device=device) @ G_inv @ translate2d_inv( -0.5, -0.5, device=device) # Execute transformation. shape = [ batch_size, num_channels, (height + Hz_pad * 2) * 2, (width + Hz_pad * 2) * 2 ] G_inv = scale2d(2 / images.shape[3], 2 / images.shape[2], device=device) @ G_inv @ scale2d_inv( 2 / shape[3], 2 / shape[2], device=device) grid = torch.nn.functional.affine_grid(theta=G_inv[:, :2, :], size=shape, align_corners=False) images = grid_sample_gradfix.grid_sample(images, grid) # Downsample and crop. images = upfirdn2d.downsample2d(x=images, f=self.Hz_geom, down=2, padding=-Hz_pad * 2, flip_filter=True) # -------------------------------------------- # Select parameters for color transformations. # -------------------------------------------- # Initialize homogeneous 3D transformation matrix: C @ color_in ==> color_out I_4 = torch.eye(4, device=device) C = I_4 # Apply brightness with probability (brightness * strength). if self.brightness > 0: b = torch.randn([batch_size], device=device) * self.brightness_std b = torch.where( torch.rand([batch_size], device=device) < self.brightness * self.p, b, torch.zeros_like(b)) if debug_percentile is not None: b = torch.full_like( b, torch.erfinv(debug_percentile * 2 - 1) * self.brightness_std) C = translate3d(b, b, b) @ C # Apply contrast with probability (contrast * strength). if self.contrast > 0: c = torch.exp2( torch.randn([batch_size], device=device) * self.contrast_std) c = torch.where( torch.rand([batch_size], device=device) < self.contrast * self.p, c, torch.ones_like(c)) if debug_percentile is not None: c = torch.full_like( c, torch.exp2( torch.erfinv(debug_percentile * 2 - 1) * self.contrast_std)) C = scale3d(c, c, c) @ C # Apply luma flip with probability (lumaflip * strength). v = misc.constant(np.asarray([1, 1, 1, 0]) / np.sqrt(3), device=device) # Luma axis. if self.lumaflip > 0: i = torch.floor(torch.rand([batch_size, 1, 1], device=device) * 2) i = torch.where( torch.rand([batch_size, 1, 1], device=device) < self.lumaflip * self.p, i, torch.zeros_like(i)) if debug_percentile is not None: i = torch.full_like(i, torch.floor(debug_percentile * 2)) C = (I_4 - 2 * v.ger(v) * i) @ C # Householder reflection. # Apply hue rotation with probability (hue * strength). if self.hue > 0 and num_channels > 1: theta = (torch.rand([batch_size], device=device) * 2 - 1) * np.pi * self.hue_max theta = torch.where( torch.rand([batch_size], device=device) < self.hue * self.p, theta, torch.zeros_like(theta)) if debug_percentile is not None: theta = torch.full_like(theta, (debug_percentile * 2 - 1) * np.pi * self.hue_max) C = rotate3d(v, theta) @ C # Rotate around v. # Apply saturation with probability (saturation * strength). if self.saturation > 0 and num_channels > 1: s = torch.exp2( torch.randn([batch_size, 1, 1], device=device) * self.saturation_std) s = torch.where( torch.rand([batch_size, 1, 1], device=device) < self.saturation * self.p, s, torch.ones_like(s)) if debug_percentile is not None: s = torch.full_like( s, torch.exp2( torch.erfinv(debug_percentile * 2 - 1) * self.saturation_std)) C = (v.ger(v) + (I_4 - v.ger(v)) * s) @ C # ------------------------------ # Execute color transformations. # ------------------------------ # Execute if the transform is not identity. if C is not I_4: images = images.reshape([batch_size, num_channels, height * width]) if num_channels == 4: alpha = images[:, 3, :].unsqueeze(dim=1) # [batch_size, 1, ...] rgb = C[:, :3, : 3] @ images[:, :3, :] + C[:, :3, 3:] # [batch_size, 3, ...] images = torch.cat([rgb, alpha], dim=1) # [batch_size, 4, ...] elif num_channels == 3: images = C[:, :3, :3] @ images + C[:, :3, 3:] elif num_channels == 1: C = C[:, :3, :].mean(dim=1, keepdims=True) images = images * C[:, :, :3].sum(dim=2, keepdims=True) + C[:, :, 3:] else: raise ValueError( 'Image must be RGBA (4 channels), RGB (3 channels) or L (1 channel)' ) images = images.reshape([batch_size, num_channels, height, width]) # ---------------------- # Image-space filtering. # ---------------------- if self.imgfilter > 0: num_bands = self.Hz_fbank.shape[0] assert len(self.imgfilter_bands) == num_bands expected_power = misc.constant( np.array([10, 1, 1, 1]) / 13, device=device) # Expected power spectrum (1/f). # Apply amplification for each band with probability (imgfilter * strength * band_strength). g = torch.ones([batch_size, num_bands], device=device) # Global gain vector (identity). for i, band_strength in enumerate(self.imgfilter_bands): t_i = torch.exp2( torch.randn([batch_size], device=device) * self.imgfilter_std) t_i = torch.where( torch.rand([batch_size], device=device) < self.imgfilter * self.p * band_strength, t_i, torch.ones_like(t_i)) if debug_percentile is not None: t_i = torch.full_like( t_i, torch.exp2( torch.erfinv(debug_percentile * 2 - 1) * self.imgfilter_std) ) if band_strength > 0 else torch.ones_like(t_i) t = torch.ones([batch_size, num_bands], device=device) # Temporary gain vector. t[:, i] = t_i # Replace i'th element. t = t / (expected_power * t.square()).sum( dim=-1, keepdims=True).sqrt() # Normalize power. g = g * t # Accumulate into global gain. # Construct combined amplification filter. Hz_prime = g @ self.Hz_fbank # [batch, tap] Hz_prime = Hz_prime.unsqueeze(1).repeat( [1, num_channels, 1]) # [batch, channels, tap] Hz_prime = Hz_prime.reshape([batch_size * num_channels, 1, -1]) # [batch * channels, 1, tap] # Apply filter. p = self.Hz_fbank.shape[1] // 2 images = images.reshape( [1, batch_size * num_channels, height, width]) images = torch.nn.functional.pad(input=images, pad=[p, p, p, p], mode='reflect') images = conv2d_gradfix.conv2d(input=images, weight=Hz_prime.unsqueeze(2), groups=batch_size * num_channels) images = conv2d_gradfix.conv2d(input=images, weight=Hz_prime.unsqueeze(3), groups=batch_size * num_channels) images = images.reshape([batch_size, num_channels, height, width]) # ------------------------ # Image-space corruptions. # ------------------------ # Apply additive RGB noise with probability (noise * strength). if self.noise > 0: sigma = torch.randn([batch_size, 1, 1, 1], device=device).abs() * self.noise_std sigma = torch.where( torch.rand([batch_size, 1, 1, 1], device=device) < self.noise * self.p, sigma, torch.zeros_like(sigma)) if debug_percentile is not None: sigma = torch.full_like( sigma, torch.erfinv(debug_percentile) * self.noise_std) images = images + torch.randn( [batch_size, num_channels, height, width], device=device) * sigma # Apply cutout with probability (cutout * strength). if self.cutout > 0: size = torch.full([batch_size, 2, 1, 1, 1], self.cutout_size, device=device) size = torch.where( torch.rand([batch_size, 1, 1, 1, 1], device=device) < self.cutout * self.p, size, torch.zeros_like(size)) center = torch.rand([batch_size, 2, 1, 1, 1], device=device) if debug_percentile is not None: size = torch.full_like(size, self.cutout_size) center = torch.full_like(center, debug_percentile) coord_x = torch.arange(width, device=device).reshape([1, 1, 1, -1]) coord_y = torch.arange(height, device=device).reshape([1, 1, -1, 1]) mask_x = (((coord_x + 0.5) / width - center[:, 0]).abs() >= size[:, 0] / 2) mask_y = (((coord_y + 0.5) / height - center[:, 1]).abs() >= size[:, 1] / 2) mask = torch.logical_or(mask_x, mask_y).to(torch.float32) images = images * mask return images
def training_loop( run_dir='.', # Output directory. training_set_kwargs={}, # Options for training set. data_loader_kwargs={}, # Options for torch.utils.data.DataLoader. G_kwargs={}, # Options for generator network. D_kwargs={}, # Options for discriminator network. D2_kwargs={}, # Options for discriminator network. G_opt_kwargs={}, # Options for generator optimizer. D_opt_kwargs={}, # Options for discriminator optimizer. augment_kwargs=None, # Options for augmentation pipeline. None = disable. loss_kwargs={}, # Options for loss function. metrics=[], # Metrics to evaluate during training. random_seed=0, # Global random seed. num_gpus=1, # Number of GPUs participating in the training. rank=0, # Rank of the current process in [0, num_gpus[. batch_size=4, # Total batch size for one training iteration. Can be larger than batch_gpu * num_gpus. batch_gpu=4, # Number of samples processed at a time by one GPU. ema_kimg=10, # Half-life of the exponential moving average (EMA) of generator weights. ema_rampup=None, # EMA ramp-up coefficient. G_reg_interval=4, # How often to perform regularization for G? None = disable lazy regularization. D_reg_interval=16, # How often to perform regularization for D? None = disable lazy regularization. augment_p=0, # Initial value of augmentation probability. ada_target=None, # ADA target value. None = fixed p. ada_interval=4, # How often to perform ADA adjustment? ada_kimg=500, # ADA adjustment speed, measured in how many kimg it takes for p to increase/decrease by one unit. total_kimg=25000, # Total length of the training, measured in thousands of real images. kimg_per_tick=4, # Progress snapshot interval. image_snapshot_ticks=50, # How often to save image snapshots? None = disable. network_snapshot_ticks=50, # How often to save network snapshots? None = disable. resume_pkl=None, # Network pickle to resume training from. cudnn_benchmark=True, # Enable torch.backends.cudnn.benchmark? abort_fn=None, # Callback function for determining whether to abort training. Must return consistent results across ranks. progress_fn=None, # Callback function for updating training progress. Called for all ranks. obake=None, # Obake training: <bool>, default = False ): # Initialize. start_time = time.time() device = torch.device('cuda', rank) np.random.seed(random_seed * num_gpus + rank) torch.manual_seed(random_seed * num_gpus + rank) torch.backends.cudnn.benchmark = cudnn_benchmark # Improves training speed. conv2d_gradfix.enabled = True # Improves training speed. grid_sample_gradfix.enabled = True # Avoids errors with the augmentation pipe. # Load training set. if rank == 0: print('Loading training set...') training_set = dnnlib.util.construct_class_by_name( **training_set_kwargs) # subclass of training.dataset.Dataset training_set_sampler = misc.InfiniteSampler(dataset=training_set, rank=rank, num_replicas=num_gpus, seed=random_seed) training_set_iterator = iter( torch.utils.data.DataLoader(dataset=training_set, sampler=training_set_sampler, batch_size=batch_size // num_gpus, **data_loader_kwargs)) if rank == 0: print() print('Num images: ', len(training_set)) print('Image shape:', training_set.image_shape) print('Label shape:', training_set.label_shape) print() # Construct networks. if rank == 0: print('Constructing networks...') common_kwargs = dict(c_dim=training_set.label_dim, img_resolution=training_set.resolution, img_channels=training_set.num_channels) G = dnnlib.util.construct_class_by_name( **G_kwargs, **common_kwargs).train().requires_grad_(False).to( device) # subclass of torch.nn.Module D = dnnlib.util.construct_class_by_name( **D_kwargs, **common_kwargs).train().requires_grad_(False).to( device) # subclass of torch.nn.Module G_ema = copy.deepcopy(G).eval() if obake is not None: D_mtcnn = MTCNN(image_size=D2_kwargs.mtcnn_output_size, margin=D2_kwargs.mtcnn_output_margin, thresholds=D2_kwargs.mtcnn_thresholds) D_face = InceptionResnetV1(pretrained=D2_kwargs.resnet_type).eval() # Resume from existing pickle. if (resume_pkl is not None) and (rank == 0): print(f'Resuming from "{resume_pkl}"') with dnnlib.util.open_url(resume_pkl) as f: resume_data = legacy.load_network_pkl(f) for name, module in [('G', G), ('D', D), ('G_ema', G_ema)]: misc.copy_params_and_buffers(resume_data[name], module, require_all=False) # Print network summary tables. if rank == 0: z = torch.empty([batch_gpu, G.z_dim], device=device) c = torch.empty([batch_gpu, G.c_dim], device=device) img = misc.print_module_summary(G, [z, c]) misc.print_module_summary(D, [img, c]) # Setup augmentation. if rank == 0: print('Setting up augmentation...') augment_pipe = None ada_stats = None if (augment_kwargs is not None) and (augment_p > 0 or ada_target is not None): augment_pipe = dnnlib.util.construct_class_by_name( **augment_kwargs).train().requires_grad_(False).to( device) # subclass of torch.nn.Module augment_pipe.p.copy_(torch.as_tensor(augment_p)) if ada_target is not None: ada_stats = training_stats.Collector(regex='Loss/signs/real') # Distribute across GPUs. if rank == 0: print(f'Distributing across {num_gpus} GPUs...') ddp_modules = dict() if obake is not None: for name, module in [('G_mapping', G.mapping), ('G_synthesis', G.synthesis), ('D', D), ('D_mtcnn', D_mtcnn), ('D_face', D_face), (None, G_ema), ('augment_pipe', augment_pipe)]: if (num_gpus > 1) and (module is not None) and len( list(module.parameters())) != 0: module.requires_grad_(True) module = torch.nn.parallel.DistributedDataParallel( module, device_ids=[device], broadcast_buffers=False) module.requires_grad_(False) if name is not None: ddp_modules[name] = module else: for name, module in [('G_mapping', G.mapping), ('G_synthesis', G.synthesis), ('D', D), (None, G_ema), ('augment_pipe', augment_pipe)]: if (num_gpus > 1) and (module is not None) and len( list(module.parameters())) != 0: module.requires_grad_(True) module = torch.nn.parallel.DistributedDataParallel( module, device_ids=[device], broadcast_buffers=False) module.requires_grad_(False) if name is not None: ddp_modules[name] = module # Setup training phases. if rank == 0: print('Setting up training phases...') loss = dnnlib.util.construct_class_by_name( device=device, **ddp_modules, **loss_kwargs) # subclass of training.loss.Loss phases = [] for name, module, opt_kwargs, reg_interval in [ ('G', G, G_opt_kwargs, G_reg_interval), ('D', D, D_opt_kwargs, D_reg_interval) ]: if reg_interval is None: opt = dnnlib.util.construct_class_by_name( params=module.parameters(), **opt_kwargs) # subclass of torch.optim.Optimizer phases += [ dnnlib.EasyDict(name=name + 'both', module=module, opt=opt, interval=1) ] else: # Lazy regularization. mb_ratio = reg_interval / (reg_interval + 1) opt_kwargs = dnnlib.EasyDict(opt_kwargs) opt_kwargs.lr = opt_kwargs.lr * mb_ratio opt_kwargs.betas = [beta**mb_ratio for beta in opt_kwargs.betas] opt = dnnlib.util.construct_class_by_name( module.parameters(), **opt_kwargs) # subclass of torch.optim.Optimizer phases += [ dnnlib.EasyDict(name=name + 'main', module=module, opt=opt, interval=1) ] phases += [ dnnlib.EasyDict(name=name + 'reg', module=module, opt=opt, interval=reg_interval) ] for phase in phases: phase.start_event = None phase.end_event = None if rank == 0: phase.start_event = torch.cuda.Event(enable_timing=True) phase.end_event = torch.cuda.Event(enable_timing=True) # Export sample images. grid_size = None grid_z = None grid_c = None if rank == 0: print('Exporting sample images...') grid_size, images, labels = setup_snapshot_image_grid( training_set=training_set) save_image_grid(images, os.path.join(run_dir, 'reals.png'), drange=[0, 255], grid_size=grid_size) grid_z = torch.randn([labels.shape[0], G.z_dim], device=device).split(batch_gpu) grid_c = torch.from_numpy(labels).to(device).split(batch_gpu) images = torch.cat([ G_ema(z=z, c=c, noise_mode='const').cpu() for z, c in zip(grid_z, grid_c) ]).numpy() save_image_grid(images, os.path.join(run_dir, 'fakes_init.png'), drange=[-1, 1], grid_size=grid_size) # Initialize logs. if rank == 0: print('Initializing logs...') stats_collector = training_stats.Collector(regex='.*') stats_metrics = dict() stats_jsonl = None stats_tfevents = None if rank == 0: stats_jsonl = open(os.path.join(run_dir, 'stats.jsonl'), 'wt') try: import torch.utils.tensorboard as tensorboard stats_tfevents = tensorboard.SummaryWriter(run_dir) except ImportError as err: print('Skipping tfevents export:', err) # Train. if rank == 0: print(f'Training for {total_kimg} kimg...') print() cur_nimg = 0 cur_tick = 0 tick_start_nimg = cur_nimg tick_start_time = time.time() maintenance_time = tick_start_time - start_time batch_idx = 0 if progress_fn is not None: progress_fn(0, total_kimg) while True: # Fetch training data. with torch.autograd.profiler.record_function('data_fetch'): phase_real_img, phase_real_c = next(training_set_iterator) phase_real_img = ( phase_real_img.to(device).to(torch.float32) / 127.5 - 1).split(batch_gpu) phase_real_c = phase_real_c.to(device).split(batch_gpu) all_gen_z = torch.randn([len(phases) * batch_size, G.z_dim], device=device) all_gen_z = [ phase_gen_z.split(batch_gpu) for phase_gen_z in all_gen_z.split(batch_size) ] all_gen_c = [ training_set.get_label(np.random.randint(len(training_set))) for _ in range(len(phases) * batch_size) ] all_gen_c = torch.from_numpy( np.stack(all_gen_c)).pin_memory().to(device) all_gen_c = [ phase_gen_c.split(batch_gpu) for phase_gen_c in all_gen_c.split(batch_size) ] # Execute training phases. for phase, phase_gen_z, phase_gen_c in zip(phases, all_gen_z, all_gen_c): if batch_idx % phase.interval != 0: continue # Initialize gradient accumulation. if phase.start_event is not None: phase.start_event.record(torch.cuda.current_stream(device)) phase.opt.zero_grad(set_to_none=True) phase.module.requires_grad_(True) # Accumulate gradients over multiple rounds. for round_idx, (real_img, real_c, gen_z, gen_c) in enumerate( zip(phase_real_img, phase_real_c, phase_gen_z, phase_gen_c)): sync = (round_idx == batch_size // (batch_gpu * num_gpus) - 1) gain = phase.interval print(phase.name) loss.accumulate_gradients(phase=phase.name, real_img=real_img, real_c=real_c, gen_z=gen_z, gen_c=gen_c, sync=sync, gain=gain) # Update weights. phase.module.requires_grad_(False) with torch.autograd.profiler.record_function(phase.name + '_opt'): for param in phase.module.parameters(): if param.grad is not None: misc.nan_to_num(param.grad, nan=0, posinf=1e5, neginf=-1e5, out=param.grad) phase.opt.step() if phase.end_event is not None: phase.end_event.record(torch.cuda.current_stream(device)) # Update G_ema. with torch.autograd.profiler.record_function('Gema'): ema_nimg = ema_kimg * 1000 if ema_rampup is not None: ema_nimg = min(ema_nimg, cur_nimg * ema_rampup) ema_beta = 0.5**(batch_size / max(ema_nimg, 1e-8)) for p_ema, p in zip(G_ema.parameters(), G.parameters()): p_ema.copy_(p.lerp(p_ema, ema_beta)) for b_ema, b in zip(G_ema.buffers(), G.buffers()): b_ema.copy_(b) # Update state. cur_nimg += batch_size batch_idx += 1 # Execute ADA heuristic. if (ada_stats is not None) and (batch_idx % ada_interval == 0): ada_stats.update() adjust = np.sign(ada_stats['Loss/signs/real'] - ada_target) * ( batch_size * ada_interval) / (ada_kimg * 1000) augment_pipe.p.copy_( (augment_pipe.p + adjust).max(misc.constant(0, device=device))) # Perform maintenance tasks once per tick. done = (cur_nimg >= total_kimg * 1000) if (not done) and (cur_tick != 0) and ( cur_nimg < tick_start_nimg + kimg_per_tick * 1000): continue # Print status line, accumulating the same information in stats_collector. tick_end_time = time.time() fields = [] fields += [ f"tick {training_stats.report0('Progress/tick', cur_tick):<5d}" ] fields += [ f"kimg {training_stats.report0('Progress/kimg', cur_nimg / 1e3):<8.1f}" ] fields += [ f"time {dnnlib.util.format_time(training_stats.report0('Timing/total_sec', tick_end_time - start_time)):<12s}" ] fields += [ f"sec/tick {training_stats.report0('Timing/sec_per_tick', tick_end_time - tick_start_time):<7.1f}" ] fields += [ f"sec/kimg {training_stats.report0('Timing/sec_per_kimg', (tick_end_time - tick_start_time) / (cur_nimg - tick_start_nimg) * 1e3):<7.2f}" ] fields += [ f"maintenance {training_stats.report0('Timing/maintenance_sec', maintenance_time):<6.1f}" ] fields += [ f"cpumem {training_stats.report0('Resources/cpu_mem_gb', psutil.Process(os.getpid()).memory_info().rss / 2**30):<6.2f}" ] fields += [ f"gpumem {training_stats.report0('Resources/peak_gpu_mem_gb', torch.cuda.max_memory_allocated(device) / 2**30):<6.2f}" ] torch.cuda.reset_peak_memory_stats() fields += [ f"augment {training_stats.report0('Progress/augment', float(augment_pipe.p.cpu()) if augment_pipe is not None else 0):.3f}" ] training_stats.report0('Timing/total_hours', (tick_end_time - start_time) / (60 * 60)) training_stats.report0('Timing/total_days', (tick_end_time - start_time) / (24 * 60 * 60)) if rank == 0: print(' '.join(fields)) # Check for abort. if (not done) and (abort_fn is not None) and abort_fn(): done = True if rank == 0: print() print('Aborting...') # Save image snapshot. if (rank == 0) and (image_snapshot_ticks is not None) and ( done or cur_tick % image_snapshot_ticks == 0): images = torch.cat([ G_ema(z=z, c=c, noise_mode='const').cpu() for z, c in zip(grid_z, grid_c) ]).numpy() save_image_grid(images, os.path.join(run_dir, f'fakes{cur_nimg//1000:06d}.png'), drange=[-1, 1], grid_size=grid_size) # Save network snapshot. snapshot_pkl = None snapshot_data = None if (network_snapshot_ticks is not None) and (done or cur_tick % network_snapshot_ticks == 0): snapshot_data = dict(training_set_kwargs=dict(training_set_kwargs)) for name, module in [('G', G), ('D', D), ('G_ema', G_ema), ('augment_pipe', augment_pipe)]: if module is not None: if num_gpus > 1: misc.check_ddp_consistency(module, ignore_regex=r'.*\.w_avg') module = copy.deepcopy(module).eval().requires_grad_( False).cpu() snapshot_data[name] = module del module # conserve memory snapshot_pkl = os.path.join( run_dir, f'network-snapshot-{cur_nimg//1000:06d}.pkl') if rank == 0: with open(snapshot_pkl, 'wb') as f: pickle.dump(snapshot_data, f) # Evaluate metrics. if (snapshot_data is not None) and (len(metrics) > 0): if rank == 0: print('Evaluating metrics...') for metric in metrics: result_dict = metric_main.calc_metric( metric=metric, G=snapshot_data['G_ema'], dataset_kwargs=training_set_kwargs, num_gpus=num_gpus, rank=rank, device=device) if rank == 0: metric_main.report_metric(result_dict, run_dir=run_dir, snapshot_pkl=snapshot_pkl) stats_metrics.update(result_dict.results) del snapshot_data # conserve memory # Collect statistics. for phase in phases: value = [] if (phase.start_event is not None) and (phase.end_event is not None): phase.end_event.synchronize() value = phase.start_event.elapsed_time(phase.end_event) training_stats.report0('Timing/' + phase.name, value) stats_collector.update() stats_dict = stats_collector.as_dict() # Update logs. timestamp = time.time() if stats_jsonl is not None: fields = dict(stats_dict, timestamp=timestamp) stats_jsonl.write(json.dumps(fields) + '\n') stats_jsonl.flush() if stats_tfevents is not None: global_step = int(cur_nimg / 1e3) walltime = timestamp - start_time for name, value in stats_dict.items(): stats_tfevents.add_scalar(name, value.mean, global_step=global_step, walltime=walltime) for name, value in stats_metrics.items(): stats_tfevents.add_scalar(f'Metrics/{name}', value, global_step=global_step, walltime=walltime) stats_tfevents.flush() if progress_fn is not None: progress_fn(cur_nimg // 1000, total_kimg) # Update state. cur_tick += 1 tick_start_nimg = cur_nimg tick_start_time = time.time() maintenance_time = tick_start_time - tick_end_time if done: break # Done. if rank == 0: print() print('Exiting...')
def adjust_p(self): self.ada_stats.update() adjust = np.sign(self.ada_stats['Loss/signs/real'] - self.ada_target) * (self.batch_size * self.ada_interval) / self.ada_kimg self.augment.p.copy_((self.augment.p + adjust).max(misc.constant(0, device=self.device)))
def training_loop( run_dir='.', # Output directory. training_set_kwargs={}, # Options for training set. data_loader_kwargs={}, # Options for torch.utils.data.DataLoader. G_kwargs={}, # Options for generator network. D_kwargs={}, # Options for discriminator network. G_opt_kwargs={}, # Options for generator optimizer. D_opt_kwargs={}, # Options for discriminator optimizer. augment_kwargs=None, # Options for augmentation pipeline. None = disable. loss_kwargs={}, # Options for loss function. savenames=None, # random_seed=0, # Global random seed. num_gpus=1, # Number of GPUs participating in the training. rank=0, # Rank of the current process in [0, num_gpus[. batch_size=4, # Total batch size for one training iteration. Can be larger than batch_gpu * num_gpus. batch_gpu=4, # Number of samples processed at a time by one GPU. ema_kimg=10, # Half-life of the exponential moving average (EMA) of generator weights. ema_rampup=None, # EMA ramp-up coefficient. G_reg_interval=4, # How often to perform regularization for G? None = disable lazy regularization. D_reg_interval=16, # How often to perform regularization for D? None = disable lazy regularization. augment_p=0, # Initial value of augmentation probability. ada_target=None, # ADA target value. None = fixed p. ada_interval=4, # How often to perform ADA adjustment? ada_kimg=500, # ADA adjustment speed, measured in how many kimg it takes for p to increase/decrease by one unit. total_kimg=25000, # Total length of the training, measured in thousands of real images. kimg_per_tick=4, # Progress snapshot interval. image_snapshot_ticks=50, # How often to save image snapshots? None = disable. network_snapshot_ticks=50, # How often to save network snapshots? None = disable. resume_pkl=None, # Network pickle to resume training from. resume_kimg=0.0, # Assumed training progress at the beginning. Affects reporting and training schedule. cudnn_benchmark=True, # Enable torch.backends.cudnn.benchmark? abort_fn=None, # Callback function for determining whether to abort training. Must return consistent results across ranks. progress_fn=None, # Callback function for updating training progress. Called for all ranks. **kwargs ): # Initialize. start_time = time.time() device = torch.device('cuda', rank) np.random.seed(random_seed * num_gpus + rank) torch.manual_seed(random_seed * num_gpus + rank) torch.backends.cudnn.benchmark = cudnn_benchmark # Improves training speed. conv2d_gradfix.enabled = True # Improves training speed. grid_sample_gradfix.enabled = True # Avoids errors with the augmentation pipe. run_id = str(int(time.time()))[3:-2] hostname = str(socket.gethostname()) # Load training set. # if rank == 0: # print('Loading training set...') training_set = dnnlib.util.construct_class_by_name(**training_set_kwargs) # subclass of training.dataset.Dataset training_set_sampler = misc.InfiniteSampler(dataset=training_set, rank=rank, num_replicas=num_gpus, seed=random_seed) training_set_iterator = iter(torch.utils.data.DataLoader(dataset=training_set, sampler=training_set_sampler, batch_size=batch_size // num_gpus, **data_loader_kwargs)) if rank == 0: # print('Num images: ', len(training_set)) print('Image shape:', training_set.image_shape) print('Label shape:', training_set.label_shape) ext = 'png' if training_set.image_shape[0] == 4 else 'jpg' # !!! # Construct networks. if rank == 0: print('Constructing networks...') common_kwargs = dict(c_dim=training_set.label_dim, img_resolution=training_set.resolution, img_channels=training_set.num_channels) G = dnnlib.util.construct_class_by_name(**G_kwargs, **common_kwargs).train().requires_grad_(False).to( device) # subclass of torch.nn.Module D = dnnlib.util.construct_class_by_name(**D_kwargs, **common_kwargs).train().requires_grad_(False).to( device) # subclass of torch.nn.Module G_ema = copy.deepcopy(G).eval() # Resume from existing pickle. if (resume_pkl is not None) and (rank == 0): # !!! if os.path.isdir(resume_pkl): resume_pkl, resume_kimg = locate_latest_pkl(resume_pkl) print(' Resuming from "%s", kimg %.3g' % (resume_pkl, resume_kimg)) with dnnlib.util.open_url(resume_pkl) as f: resume_data = legacy.load_network_pkl(f) for name, module in [('G', G), ('D', D), ('G_ema', G_ema)]: misc.copy_params_and_buffers(resume_data[name], module, require_all=False) # Print network summary tables. if rank == 0: # z = torch.empty([batch_gpu, G.z_dim], device=device) # c = torch.empty([batch_gpu, G.c_dim], device=device) # img = misc.print_module_summary(G, [z, c]) # misc.print_module_summary(D, [img, c]) pass # Setup augmentation. # if rank == 0: # print('Setting up augmentation...') augment_pipe = None ada_stats = None if (augment_kwargs is not None) and (augment_p > 0 or ada_target is not None): augment_pipe = dnnlib.util.construct_class_by_name(**augment_kwargs).train().requires_grad_(False).to( device) # subclass of torch.nn.Module augment_pipe.p.copy_(torch.as_tensor(augment_p)) if ada_target is not None: ada_stats = training_stats.Collector(regex='Loss/signs/real') # Distribute across GPUs. # if rank == 0: # print(f'Distributing across {num_gpus} GPUs...') ddp_modules = dict() for name, module in [('G_mapping', G.mapping), ('G_synthesis', G.synthesis), ('D', D), (None, G_ema), ('augment_pipe', augment_pipe)]: if (num_gpus > 1) and (module is not None) and len(list(module.parameters())) != 0: module.requires_grad_(True) module = torch.nn.parallel.DistributedDataParallel(module, device_ids=[device], broadcast_buffers=False) module.requires_grad_(False) if name is not None: ddp_modules[name] = module # Setup training phases. # if rank == 0: # print('Setting up training phases...') loss = dnnlib.util.construct_class_by_name(device=device, **ddp_modules, **loss_kwargs) # subclass of training.loss.Loss phases = [] for name, module, opt_kwargs, reg_interval in [('G', G, G_opt_kwargs, G_reg_interval), ('D', D, D_opt_kwargs, D_reg_interval)]: if reg_interval is None: opt = dnnlib.util.construct_class_by_name(params=module.parameters(), **opt_kwargs) # subclass of torch.optim.Optimizer phases += [dnnlib.EasyDict(name=name + 'both', module=module, opt=opt, interval=1)] else: # Lazy regularization. mb_ratio = reg_interval / (reg_interval + 1) opt_kwargs = dnnlib.EasyDict(opt_kwargs) opt_kwargs.lr = opt_kwargs.lr * mb_ratio opt_kwargs.betas = [beta ** mb_ratio for beta in opt_kwargs.betas] opt = dnnlib.util.construct_class_by_name(module.parameters(), **opt_kwargs) # subclass of torch.optim.Optimizer phases += [dnnlib.EasyDict(name=name + 'main', module=module, opt=opt, interval=1)] phases += [dnnlib.EasyDict(name=name + 'reg', module=module, opt=opt, interval=reg_interval)] for phase in phases: phase.start_event = None phase.end_event = None if rank == 0: phase.start_event = torch.cuda.Event(enable_timing=True) phase.end_event = torch.cuda.Event(enable_timing=True) # Export sample images. grid_size = None grid_z = None grid_c = None if rank == 0: # print('Exporting sample images...') grid_size, images, labels = setup_snapshot_image_grid(training_set=training_set) save_image_grid(images, os.path.join(run_dir, '_reals.' + ext), drange=[0, 255], grid_size=grid_size) grid_z = torch.randn([labels.shape[0], G.z_dim], device=device).split(batch_gpu) grid_c = torch.from_numpy(labels).to(device).split(batch_gpu) # Initialize logs. # if rank == 0: # print('Initializing logs...') stats_collector = training_stats.Collector(regex='.*') # Only upload data once, not for every process if rank == 0: url = f'https://mnr6yzqr22jgywm-adw2.adb.eu-frankfurt-1.oraclecloudapps.com/ords/thesisproject/sg_d/' \ f'report/{run_id}/{hostname}' metadata = training_set_kwargs more_metadata = [ {'run_dir': run_dir}, {'data_loader_kwargs': data_loader_kwargs}, {'G_kwargs': G_kwargs}, {'D_kwargs': D_kwargs}, {'G_opt_kwargs': G_opt_kwargs}, {'D_opt_kwargs': D_opt_kwargs}, {'augment_kwargs': augment_kwargs}, {'loss_kwargs': loss_kwargs}, {'savenames': savenames}, {'random_seed': random_seed}, {'num_gpus': num_gpus}, {'batch_size': batch_size}, {'batch_gpu': batch_gpu}, {'ema_kimg': ema_kimg}, {'ema_rampup': ema_rampup}, {'G_reg_interval': G_reg_interval}, {'D_reg_interval': D_reg_interval}, {'augment_p': augment_p}, {'ada_target': ada_target}, {'ada_interval': ada_interval}, {'ada_kimg': ada_kimg}, {'total_kimg': total_kimg}, {'kimg_per_tick': kimg_per_tick}, {'image_snapshot_ticks': image_snapshot_ticks}, {'network_snapshot_ticks': network_snapshot_ticks}, {'resume_pkl': resume_pkl}, {'resume_kimg': resume_kimg}, {'cudnn_benchmark': cudnn_benchmark} ] for d in more_metadata: metadata.update(d) requests.post(url, data=json.dumps(training_set_kwargs)) stats_jsonl = None # stats_tfevents = None if rank == 0: stats_jsonl = open(os.path.join(run_dir, 'stats.jsonl'), 'wt') # Train. if rank == 0: print(f'Training for {total_kimg} kimg...') print() cur_nimg = 0 cur_tick = 0 tick_start_nimg = cur_nimg tick_start_time = time.time() # maintenance_time = tick_start_time - start_time batch_idx = 0 if progress_fn is not None: progress_fn(0, total_kimg) while True: # Fetch training data. with torch.autograd.profiler.record_function('data_fetch'): phase_real_img, phase_real_c = next(training_set_iterator) phase_real_img = (phase_real_img.to(device).to(torch.float32) / 127.5 - 1).split(batch_gpu) phase_real_c = phase_real_c.to(device).split(batch_gpu) all_gen_z = torch.randn([len(phases) * batch_size, G.z_dim], device=device) all_gen_z = [phase_gen_z.split(batch_gpu) for phase_gen_z in all_gen_z.split(batch_size)] all_gen_c = [training_set.get_label(np.random.randint(len(training_set))) for _ in range(len(phases) * batch_size)] all_gen_c = torch.from_numpy(np.stack(all_gen_c)).pin_memory().to(device) all_gen_c = [phase_gen_c.split(batch_gpu) for phase_gen_c in all_gen_c.split(batch_size)] # Execute training phases. for phase, phase_gen_z, phase_gen_c in zip(phases, all_gen_z, all_gen_c): if batch_idx % phase.interval != 0: continue # Initialize gradient accumulation. if phase.start_event is not None: phase.start_event.record(torch.cuda.current_stream(device)) phase.opt.zero_grad(set_to_none=True) phase.module.requires_grad_(True) # Accumulate gradients over multiple rounds. for round_idx, (real_img, real_c, gen_z, gen_c) in enumerate( zip(phase_real_img, phase_real_c, phase_gen_z, phase_gen_c)): sync = (round_idx == batch_size // (batch_gpu * num_gpus) - 1) gain = phase.interval loss.accumulate_gradients(phase=phase.name, real_img=real_img, real_c=real_c, gen_z=gen_z, gen_c=gen_c, sync=sync, gain=gain) # Update weights. phase.module.requires_grad_(False) with torch.autograd.profiler.record_function(phase.name + '_opt'): for param in phase.module.parameters(): if param.grad is not None: misc.nan_to_num(param.grad, nan=0, posinf=1e5, neginf=-1e5, out=param.grad) phase.opt.step() if phase.end_event is not None: phase.end_event.record(torch.cuda.current_stream(device)) # Update G_ema. with torch.autograd.profiler.record_function('Gema'): ema_nimg = ema_kimg * 1000 if ema_rampup is not None: ema_nimg = min(ema_nimg, cur_nimg * ema_rampup) ema_beta = 0.5 ** (batch_size / max(ema_nimg, 1e-8)) for p_ema, p in zip(G_ema.parameters(), G.parameters()): p_ema.copy_(p.lerp(p_ema, ema_beta)) for b_ema, b in zip(G_ema.buffers(), G.buffers()): b_ema.copy_(b) # Update state. cur_nimg += batch_size batch_idx += 1 # Execute ADA heuristic. if (ada_stats is not None) and (batch_idx % ada_interval == 0): ada_stats.update() adjust = np.sign(ada_stats['Loss/signs/real'] - ada_target) * (batch_size * ada_interval) / ( ada_kimg * 1000) augment_pipe.p.copy_((augment_pipe.p + adjust).max(misc.constant(0, device=device))) # Perform maintenance tasks once per tick. done = (cur_nimg >= total_kimg * 1000) if (not done) and (cur_tick != 0) and (cur_nimg < tick_start_nimg + kimg_per_tick * 1000): continue tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0 tick_end_time = time.time() total_time = tick_end_time - start_time tick_time = tick_end_time - tick_start_time left_kimg = total_kimg - cur_nimg / 1000 left_sec = left_kimg * tick_time / tick_kimg finaltime = time.asctime(time.localtime(tick_end_time + left_sec)) msg_final = ' %ss left till %s ' % (shortime(left_sec), finaltime[11:16]) # Print status line, accumulating the same information in stats_collector. # tick_end_time = time.time() fields = [] fields += [f"tick {training_stats.report0('Progress/tick', cur_tick):<4d}"] fields += [f"kimg {training_stats.report0('Progress/kimg', cur_nimg / 1e3):<6.1f}"] fields += [f"time {dnnlib.util.format_time(training_stats.report0('Timing/total_sec', total_time)):<8s}"] fields += [msg_final] fields += [f"min/tick {training_stats.report0('Timing/sec_per_tick', tick_time / 60):<6.3g}"] fields += [f"sec/kimg {training_stats.report0('Timing/sec_per_kimg', tick_time / tick_kimg):<7.3g}"] fields += [f"cpumem {training_stats.report0('Resources/cpu_mem_gb', psutil.Process(os.getpid()).memory_info().rss / 2 ** 30):<4.1f}"] fields += [f"gpumem {training_stats.report0('Resources/peak_gpu_mem_gb', torch.cuda.max_memory_allocated(device) / 2 ** 30):<4.1f}"] torch.cuda.reset_peak_memory_stats() fields += [f"aug {training_stats.report0('Progress/augment', float(augment_pipe.p.cpu()) if augment_pipe is not None else 0):.3f}"] # !!! fields += ["lr %.2g" % training_stats.report0('Progress/G_lrate', G_opt_kwargs.lr)] fields += ["%.2g" % training_stats.report0('Progress/D_lrate', D_opt_kwargs.lr)] training_stats.report0('Timing/total_hours', total_time / (60 * 60)) training_stats.report0('Timing/total_days', total_time / (24 * 60 * 60)) if rank == 0: print(' '.join(fields)) # Check for abort. if (not done) and (abort_fn is not None) and abort_fn(): done = True if rank == 0: print() print('Aborting...') # Save image snapshot. if (rank == 0) and (image_snapshot_ticks is not None) and (done or cur_tick % image_snapshot_ticks == 0): images = torch.cat([G_ema(z=z, c=c, noise_mode='const').cpu() for z, c in zip(grid_z, grid_c)]).numpy() save_image_grid(images, os.path.join(run_dir, 'fake-%04d.%s' % (cur_nimg // 1000, ext)), drange=[-1, 1], grid_size=grid_size) # Save network snapshot. # snapshot_pkl = None if (network_snapshot_ticks is not None) and (done or cur_tick % network_snapshot_ticks == 0): snapshot_data = dict(training_set_kwargs=dict(training_set_kwargs)) compact_data = dict(training_set_kwargs=dict(training_set_kwargs)) for name, module in [('G', G), ('D', D), ('G_ema', G_ema), ('augment_pipe', augment_pipe)]: if module is not None: if num_gpus > 1: misc.check_ddp_consistency(module, ignore_regex=r'.*\.w_avg') module = copy.deepcopy(module).eval().requires_grad_(False).cpu() snapshot_data[name] = module if name == 'G_ema': compact_data[name] = module # !!! del module # conserve memory # !!! pkl_snap = os.path.join(run_dir, '%s-%04d.pkl' % (savenames[0], cur_nimg // 1000)) pkl_run = os.path.join(run_dir, '%s-%04d.pkl' % (savenames[1], cur_nimg // 1000)) if rank == 0: with open(pkl_snap, 'wb') as f: pickle.dump(snapshot_data, f) with open(pkl_run, 'wb') as f: pickle.dump(compact_data, f) # Evaluate metrics. ''' if (snapshot_data is not None) and (len(metrics) > 0): if rank == 0: print('Evaluating metrics...') for metric in metrics: result_dict = metric_main.calc_metric(metric=metric, G=snapshot_data['G_ema'], dataset_kwargs=training_set_kwargs, num_gpus=num_gpus, rank=rank, device=device) if rank == 0: metric_main.report_metric(result_dict, run_dir=run_dir, snapshot_pkl=snapshot_pkl) stats_metrics.update(result_dict.results) ''' try: del snapshot_data # conserve memory except UnboundLocalError: pass # !!! try: del compact_data # conserve memory except UnboundLocalError: pass # Collect statistics. for phase in phases: value = [] if (phase.start_event is not None) and (phase.end_event is not None): phase.end_event.synchronize() value = phase.start_event.elapsed_time(phase.end_event) training_stats.report0('Timing/' + phase.name, value) stats_collector.update() stats_dict = stats_collector.as_dict() # Report stats to database if rank == 0: url = f'https://mnr6yzqr22jgywm-adw2.adb.eu-frankfurt-1.oraclecloudapps.com/ords/thesisproject/sg2/' \ f'report/{run_id}/{hostname}' requests.post(url, data=json.dumps(stats_dict)) # Update logs. timestamp = time.time() if stats_jsonl is not None: fields = dict(stats_dict, timestamp=timestamp) json_data = json.dumps(fields) # Write stats to file stats_jsonl.write(json_data + '\n') stats_jsonl.flush() # if stats_tfevents is not None: # global_step = int(cur_nimg / 1e3) # walltime = timestamp - start_time # for name, value in stats_dict.items(): # stats_tfevents.add_scalar(name, value.mean, global_step=global_step, walltime=walltime) # for name, value in stats_metrics.items(): # stats_tfevents.add_scalar(f'Metrics/{name}', value, global_step=global_step, walltime=walltime) # stats_tfevents.flush() if progress_fn is not None: progress_fn(cur_nimg // 1000, total_kimg) # Update state. cur_tick += 1 tick_start_nimg = cur_nimg tick_start_time = time.time() # maintenance_time = tick_start_time - tick_end_time if done: break # Done. if rank == 0: print() print('Exiting...')