def register_gradients(self, loss: TfExpression, trainable_vars: Union[List, dict]) -> None: """Register the gradients of the given loss function with respect to the given variables. Intended to be called once per GPU.""" tfutil.assert_tf_initialized() assert not self._updates_applied device = self._get_device(loss.device) # Validate trainables. if isinstance(trainable_vars, dict): trainable_vars = list(trainable_vars.values( )) # allow passing in Network.trainables as vars assert isinstance(trainable_vars, list) and len(trainable_vars) >= 1 assert all( tfutil.is_tf_expression(expr) for expr in trainable_vars + [loss]) assert all(var.device == device.name for var in trainable_vars) # Validate shapes. if self._gradient_shapes is None: self._gradient_shapes = [ var.shape.as_list() for var in trainable_vars ] assert len(trainable_vars) == len(self._gradient_shapes) assert all( var.shape.as_list() == var_shape for var, var_shape in zip(trainable_vars, self._gradient_shapes)) # Report memory usage if requested. deps = [] if self._report_mem_usage: self._report_mem_usage = False try: with tf.name_scope(self.id + '_mem'), tflex.device( device.name), tf.control_dependencies([loss]): deps.append( autosummary.autosummary( self.id + "/mem_usage_gb", tf.contrib.memory_stats.BytesInUse() / 2**30)) except tf.errors.NotFoundError: pass # Compute gradients. with tf.name_scope(self.id + "_grad"), tflex.device( device.name), tf.control_dependencies(deps): loss = self.apply_loss_scaling(tf.cast(loss, tf.float32)) gate = tf.train.Optimizer.GATE_NONE # disable gating to reduce memory usage grad_list = device.optimizer.compute_gradients( loss=loss, var_list=trainable_vars, gate_gradients=gate) # Register gradients. for grad, var in grad_list: if var not in device.grad_raw: device.grad_raw[var] = [] device.grad_raw[var].append(grad)
def autosummary(name: str, value: TfExpressionEx, passthru: TfExpressionEx = None, condition: TfExpressionEx = True) -> TfExpressionEx: """Create a new autosummary. Args: name: Name to use in TensorBoard value: TensorFlow expression or python value to track passthru: Optionally return this TF node without modifications but tack an autosummary update side-effect to this node. Example use of the passthru mechanism: n = autosummary('l2loss', loss, passthru=n) This is a shorthand for the following code: with tf.control_dependencies([autosummary('l2loss', loss)]): n = tf.identity(n) """ tf.logging.info('autosummary(%s, %s)', repr(name), repr(value)) get_tpu_summary().scalar(name, value) return value tfutil.assert_tf_initialized() name_id = name.replace("/", "_") if tfutil.is_tf_expression(value): with tf.name_scope("summary_" + name_id), tflex.device(value.device): condition = tf.convert_to_tensor(condition, name='condition') update_op = tf.cond(condition, lambda: tf.group(_create_var(name, value)), tf.no_op) with tf.control_dependencies([update_op]): return tf.identity(value if passthru is None else passthru) else: # python scalar or numpy array assert not tfutil.is_tf_expression(passthru) assert not tfutil.is_tf_expression(condition) if condition: if name not in _immediate: with tfutil.absolute_name_scope( "Autosummary/" + name_id), tflex.device( None), tf.control_dependencies(None): update_value = tf.placeholder(_dtype) update_op = _create_var(name, update_value) _immediate[name] = update_op, update_value update_op, update_value = _immediate[name] tfutil.run(update_op, {update_value: value}) return value if passthru is None else passthru
def gen_images(latents, truncation_psi_val, outfile=None, display=False, labels=None, randomize_noise=False, is_validation=True, network=None, numpy=False): if outfile: Path(outfile).parent.mkdir(exist_ok=True, parents=True) if network is None: network = Gs n = latents.shape[0] grid_size = get_grid_size(n) drange_net = [-1, 1] with tflex.device('/gpu:0'): result = network.run(latents, labels, truncation_psi_val=truncation_psi_val, is_validation=is_validation, randomize_noise=randomize_noise, minibatch_size=sched.minibatch_gpu) result = result[:, 0:3, :, :] img = misc.convert_to_pil_image( misc.create_image_grid(result, grid_size), drange_net) if outfile is not None: img.save(outfile) if display: f = BytesIO() img.save(f, 'png') IPython.display.display(IPython.display.Image(data=f.getvalue())) return result if numpy else img
def init(session=None, num_channels=None, resolution=None, label_size=None): label_size = int( os.environ['LABEL_SIZE']) if label_size is None else label_size resolution = int( os.environ['RESOLUTION']) if resolution is None else resolution num_channels = int( os.environ['NUM_CHANNELS']) if num_channels is None else num_channels dnnlib.tflib.init_tf() session = tflex.get_session(session) pprint(session.list_devices()) tflex.set_override_cores(tflex.get_cores()) with tflex.device('/gpu:0'): tflex.G = tflib.Network('G', num_channels=num_channels, resolution=resolution, label_size=label_size, **G_args) tflex.G.print_layers() tflex.Gs, tflex.Gs_finalize = tflex.G.clone2('Gs') tflex.Gs_finalize() tflex.D = tflib.Network('D', num_channels=num_channels, resolution=resolution, label_size=label_size, **D_args) tflex.D.print_layers() tflib.run(tf.global_variables_initializer()) return session
def get_images(tags, seed=0, mu=0, sigma=0, truncation=None): print("Generating mammos...") Gs_kwargs = dnnlib.EasyDict() Gs_kwargs.output_transform = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True) Gs_kwargs.randomize_noise = False if truncation is not None: Gs_kwargs.truncation_psi = truncation rnd = np.random.RandomState(seed) all_seeds = [seed] * batch_size all_z = np.stack([ np.random.RandomState(seed).randn(*tflex.Gs.input_shape[1:]) for seed in all_seeds ]) # [minibatch, component] print(all_z.shape) drange_net = [-1, 1] with tflex.device('/gpu:0'): result = tflex.Gs.run(all_z, None, is_validation=True, randomize_noise=False, minibatch_size=sched.minibatch_gpu) if result.shape[1] > 3: final = result[:, 3, :, :] else: final = None result = result[:, 0:3, :, :] img = misc.convert_to_pil_image(misc.create_image_grid(result, (1, 1)), drange_net) img.save('mammos.png') return result, img
def _evaluate(self, Gs, Gs_kwargs, num_gpus): minibatch_size = num_gpus * self.minibatch_per_gpu feature_net = misc.load_pkl( 'https://drive.google.com/uc?id=1MzY4MFpZzE-mNS26pzhYlWN-4vMm2ytu', 'vgg16.pkl') # Calculate features for reals. cache_file = self._get_cache_file_for_reals(num_images=self.num_images) os.makedirs(os.path.dirname(cache_file), exist_ok=True) if os.path.isfile(cache_file): ref_features = misc.load_pkl(cache_file) else: ref_features = np.empty( [self.num_images, feature_net.output_shape[1]], dtype=np.float32) for idx, images in enumerate( self._iterate_reals(minibatch_size=minibatch_size)): begin = idx * minibatch_size end = min(begin + minibatch_size, self.num_images) ref_features[begin:end] = feature_net.run(images[:end - begin], num_gpus=num_gpus, assume_frozen=True) if end == self.num_images: break misc.save_pkl(ref_features, cache_file) # Construct TensorFlow graph. result_expr = [] for gpu_idx in range(num_gpus): with tflex.device('/gpu:%d' % gpu_idx): Gs_clone = Gs.clone() feature_net_clone = feature_net.clone() latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) labels = self._get_random_labels_tf(self.minibatch_per_gpu) images = Gs_clone.get_output_for(latents, labels, **Gs_kwargs) images = tflib.convert_images_to_uint8(images) result_expr.append(feature_net_clone.get_output_for(images)) # Calculate features for fakes. eval_features = np.empty( [self.num_images, feature_net.output_shape[1]], dtype=np.float32) for begin in range(0, self.num_images, minibatch_size): self._report_progress(begin, self.num_images) end = min(begin + minibatch_size, self.num_images) eval_features[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end - begin] # Calculate precision and recall. state = knn_precision_recall_features( ref_features=ref_features, eval_features=eval_features, feature_net=feature_net, nhood_sizes=[self.nhood_size], row_batch_size=self.row_batch_size, col_batch_size=self.row_batch_size, num_gpus=num_gpus) self._report_result(state.knn_precision[0], suffix='_precision') self._report_result(state.knn_recall[0], suffix='_recall')
def __init__(self, num_features, num_gpus): self.num_features = num_features self.num_gpus = num_gpus # Initialize TF graph to calculate pairwise distances. with tflex.device('/cpu:0'): self._features_batch1 = tf.placeholder( tf.float16, shape=[None, self.num_features]) self._features_batch2 = tf.placeholder( tf.float16, shape=[None, self.num_features]) features_split2 = tf.split(self._features_batch2, self.num_gpus, axis=0) distances_split = [] for gpu_idx in range(self.num_gpus): with tflex.device('/gpu:%d' % gpu_idx): distances_split.append( batch_pairwise_distances(self._features_batch1, features_split2[gpu_idx])) self._distance_block = tf.concat(distances_split, axis=1)
def get_random_labels_tf(self, minibatch_size): # => labels with tf.name_scope('Dataset'): if self.label_size > 0: with tflex.device('/cpu:0'): return tf.gather( self._tf_labels_var, tf.random_uniform([minibatch_size], 0, self._np_labels.shape[0], dtype=tf.int32)) return tf.zeros([minibatch_size, 0], self.label_dtype)
def setup_weight_histograms(self, title: str = None) -> None: """Construct summary ops to include histograms of all trainable parameters in TensorBoard.""" if title is None: title = self.name with tf.name_scope(None), tflex.device(None), tf.control_dependencies( None): for local_name, var in self.trainables.items(): if "/" in local_name: p = local_name.split("/") name = title + "_" + p[-1] + "/" + "_".join(p[:-1]) else: name = title + "_toplevel/" + local_name tf.summary.histogram(name, var)
def _get_device(self, device_name: str): """Get internal state for the given TensorFlow device.""" tfutil.assert_tf_initialized() if device_name in self._devices: return self._devices[device_name] # Initialize fields. device = util.EasyDict() device.name = device_name device.optimizer = None # Underlying optimizer: optimizer_class device.loss_scaling_var = None # Log2 of loss scaling: tf.Variable device.grad_raw = OrderedDict( ) # Raw gradients: var => [grad, ...] device.grad_clean = OrderedDict( ) # Clean gradients: var => grad device.grad_acc_vars = OrderedDict( ) # Accumulation sums: var => tf.Variable device.grad_acc_count = None # Accumulation counter: tf.Variable device.grad_acc = OrderedDict( ) # Accumulated gradients: var => grad # Setup TensorFlow objects. with tfutil.absolute_name_scope(self.scope + "/Devices"), tflex.device( device_name), tf.control_dependencies(None): if device_name not in self._shared_optimizers: optimizer_name = self.scope.replace( "/", "_") + "_opt%d" % len(self._shared_optimizers) self._shared_optimizers[device_name] = self.optimizer_class( name=optimizer_name, learning_rate=self.learning_rate, **self.optimizer_kwargs) if self._cross_shard or 'TPU_REPLICATED_CORE' in device_name: print('Using cross-shard optimizer for %s' % device_name) self._shared_optimizers[ device_name] = tf.contrib.tpu.CrossShardOptimizer( self._shared_optimizers[device_name]) device.optimizer = self._shared_optimizers[device_name] if self.use_loss_scaling: device.loss_scaling_var = tf.Variable(np.float32( self.loss_scaling_init), trainable=False, name="loss_scaling_var") # Register device. self._devices[device_name] = device return device
def _evaluate(self, Gs, Gs_kwargs, num_gpus): minibatch_size = num_gpus * self.minibatch_per_gpu inception = misc.load_pkl('https://drive.google.com/uc?id=1MzTY44rLToO5APn8TZmfR7_ENSe5aZUn', 'inception_v3_features.pkl') activations = np.empty([self.num_images, inception.output_shape[1]], dtype=np.float32) # Calculate statistics for reals. cache_file = self._get_cache_file_for_reals(num_images=self.num_images) os.makedirs(os.path.dirname(cache_file), exist_ok=True) if os.path.isfile(cache_file): mu_real, sigma_real = misc.load_pkl(cache_file) else: for idx, images in enumerate(self._iterate_reals(minibatch_size=minibatch_size)): begin = idx * minibatch_size end = min(begin + minibatch_size, self.num_images) activations[begin:end] = inception.run(images[:end-begin], num_gpus=num_gpus, assume_frozen=True) if end == self.num_images: break mu_real = np.mean(activations, axis=0) sigma_real = np.cov(activations, rowvar=False) misc.save_pkl((mu_real, sigma_real), cache_file) # Construct TensorFlow graph. result_expr = [] for gpu_idx in range(num_gpus): with tflex.device('/gpu:%d' % gpu_idx): Gs_clone = Gs.clone() inception_clone = inception.clone() latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) labels = self._get_random_labels_tf(self.minibatch_per_gpu) images = Gs_clone.get_output_for(latents, labels, **Gs_kwargs) images = tflib.convert_images_to_uint8(images) result_expr.append(inception_clone.get_output_for(images)) # Calculate statistics for fakes. for begin in range(0, self.num_images, minibatch_size): self._report_progress(begin, self.num_images) end = min(begin + minibatch_size, self.num_images) activations[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end-begin] mu_fake = np.mean(activations, axis=0) sigma_fake = np.cov(activations, rowvar=False) # Calculate FID. m = np.square(mu_fake - mu_real).sum() s, _ = scipy.linalg.sqrtm(np.dot(sigma_fake, sigma_real), disp=False) # pylint: disable=no-member dist = m + np.trace(sigma_fake + sigma_real - 2*s) self._report_result(np.real(dist))
def save_summaries(file_writer, global_step=None): """Call FileWriter.add_summary() with all summaries in the default graph, automatically finalizing and merging them on the first call. """ return global _merge_op tfutil.assert_tf_initialized() if _merge_op is None: layout = finalize_autosummaries() if layout is not None: file_writer.add_summary(layout) with tflex.device(None), tf.control_dependencies(None): _merge_op = tf.summary.merge_all() if _merge_op is not None: file_writer.add_summary(_merge_op.eval(), global_step)
def finalize(): # Build TF expressions. with tf.name_scope('Dataset'), tflex.device('/cpu:0'): self._tf_minibatch_in = batch_size if batch_size is not None or batch_size <= 0 else tf.placeholder( tf.int64, name='minibatch_in', shape=[]) self._tf_labels_var, self._tf_labels_init = tflib.create_var_with_large_initial_value2( self._np_labels, name='labels_var') with tf.control_dependencies([self._tf_labels_init]): self._tf_labels_dataset = tf.data.Dataset.from_tensor_slices( self._tf_labels_var) for tfr_file, tfr_shape, tfr_lod in self.tfr: if tfr_lod < 0: continue dset = tf.data.TFRecordDataset(tfr_file, compression_type='', buffer_size=buffer_mb << 20) if max_images is not None: dset = dset.take(max_images) dset = dset.map(self.parse_tfrecord_tf, num_parallel_calls=num_threads) dset = tf.data.Dataset.zip((dset, self._tf_labels_dataset)) bytes_per_item = np.prod(tfr_shape) * np.dtype( self.dtype).itemsize if shuffle_mb > 0: dset = dset.shuffle(( (shuffle_mb << 20) - 1) // bytes_per_item + 1) if repeat: dset = dset.repeat() if prefetch_mb > 0: dset = dset.prefetch(( (prefetch_mb << 20) - 1) // bytes_per_item + 1) if batch_size is None or batch_size > 0: dset = dset.batch(self._tf_minibatch_in) self._tf_datasets[tfr_lod] = dset self._tf_iterator = tf.data.Iterator.from_structure( self._tf_datasets[0].output_types, self._tf_datasets[0].output_shapes) self._tf_init_ops = { lod: self._tf_iterator.make_initializer(dset) if batch_size is None else tf.no_op() for lod, dset in self._tf_datasets.items() }
def _evaluate(self, Gs, Gs_kwargs, num_gpus): minibatch_size = num_gpus * self.minibatch_per_gpu inception = misc.load_pkl( 'https://drive.google.com/uc?id=1Mz9zQnIrusm3duZB91ng_aUIePFNI6Jx', 'inception_v3_softmax.pkl') activations = np.empty([self.num_images, inception.output_shape[1]], dtype=np.float32) # Construct TensorFlow graph. result_expr = [] for gpu_idx in range(num_gpus): with tflex.device('/gpu:%d' % gpu_idx): Gs_clone = Gs.clone() inception_clone = inception.clone() latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) labels = self._get_random_labels_tf(self.minibatch_per_gpu) images = Gs_clone.get_output_for(latents, labels, **Gs_kwargs) images = tflib.convert_images_to_uint8(images) result_expr.append(inception_clone.get_output_for(images)) # Calculate activations for fakes. for begin in range(0, self.num_images, minibatch_size): self._report_progress(begin, self.num_images) end = min(begin + minibatch_size, self.num_images) activations[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end - begin] # Calculate IS. scores = [] for i in range(self.num_splits): part = activations[i * self.num_images // self.num_splits:(i + 1) * self.num_images // self.num_splits] kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0))) kl = np.mean(np.sum(kl, 1)) scores.append(np.exp(kl)) self._report_result(np.mean(scores), suffix='_mean') self._report_result(np.std(scores), suffix='_std')
def _evaluate(self, Gs, Gs_kwargs, num_gpus): Gs_kwargs = dict(Gs_kwargs) Gs_kwargs.update(self.Gs_overrides) minibatch_size = num_gpus * self.minibatch_per_gpu # Construct TensorFlow graph. distance_expr = [] for gpu_idx in range(num_gpus): with tflex.device('/gpu:%d' % gpu_idx): Gs_clone = Gs.clone() noise_vars = [var for name, var in Gs_clone.components.synthesis.vars.items() if name.startswith('noise')] # Generate random latents and interpolation t-values. lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] + Gs_clone.input_shape[1:]) lerp_t = tf.random_uniform([self.minibatch_per_gpu], 0.0, 1.0 if self.sampling == 'full' else 0.0) labels = tf.reshape(tf.tile(self._get_random_labels_tf(self.minibatch_per_gpu), [1, 2]), [self.minibatch_per_gpu * 2, -1]) # Interpolate in W or Z. if self.space == 'w': dlat_t01 = Gs_clone.components.mapping.get_output_for(lat_t01, labels, **Gs_kwargs) dlat_t01 = tf.cast(dlat_t01, tf.float32) dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2] dlat_e0 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis]) dlat_e1 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis] + self.epsilon) dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis=1), dlat_t01.shape) else: # space == 'z' lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2] lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis]) lat_e1 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis] + self.epsilon) lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis=1), lat_t01.shape) dlat_e01 = Gs_clone.components.mapping.get_output_for(lat_e01, labels, **Gs_kwargs) # Synthesize images. with tf.control_dependencies([var.initializer for var in noise_vars]): # use same noise inputs for the entire minibatch images = Gs_clone.components.synthesis.get_output_for(dlat_e01, randomize_noise=False, **Gs_kwargs) images = tf.cast(images, tf.float32) # Crop only the face region. if self.crop: c = int(images.shape[2] // 8) images = images[:, :, c*3 : c*7, c*2 : c*6] # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. factor = images.shape[2] // 256 if factor > 1: images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor]) images = tf.reduce_mean(images, axis=[3,5]) # Scale dynamic range from [-1,1] to [0,255] for VGG. images = (images + 1) * (255 / 2) # Evaluate perceptual distance. img_e0, img_e1 = images[0::2], images[1::2] distance_measure = misc.load_pkl('https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2', 'vgg16_zhang_perceptual.pkl') distance_expr.append(distance_measure.get_output_for(img_e0, img_e1) * (1 / self.epsilon**2)) # Sampling loop. all_distances = [] for begin in range(0, self.num_samples, minibatch_size): self._report_progress(begin, self.num_samples) all_distances += tflib.run(distance_expr) all_distances = np.concatenate(all_distances, axis=0) # Reject outliers. lo = np.percentile(all_distances, 1, interpolation='lower') hi = np.percentile(all_distances, 99, interpolation='higher') filtered_distances = np.extract(np.logical_and(lo <= all_distances, all_distances <= hi), all_distances) self._report_result(np.mean(filtered_distances))
def apply_updates(self, allow_no_op: bool = False) -> tf.Operation: """Construct training op to update the registered variables based on their gradients.""" tfutil.assert_tf_initialized() assert not self._updates_applied self._updates_applied = True all_ops = [] # Check for no-op. if allow_no_op and len(self._devices) == 0: with tfutil.absolute_name_scope(self.scope): return tf.no_op(name='TrainingOp') # Clean up gradients. for device_idx, device in enumerate(self._devices.values()): with tfutil.absolute_name_scope(self.scope + "/Clean%d" % device_idx), tflex.device( device.name): for var, grad in device.grad_raw.items(): # Filter out disconnected gradients and convert to float32. grad = [g for g in grad if g is not None] grad = [tf.cast(g, tf.float32) for g in grad] # Sum within the device. if len(grad) == 0: grad = tf.zeros(var.shape) # No gradients => zero. elif len(grad) == 1: grad = grad[0] # Single gradient => use as is. else: grad = tf.add_n(grad) # Multiple gradients => sum. # Scale as needed. scale = 1.0 / len(device.grad_raw[var]) / len( self._devices) scale = tf.constant(scale, dtype=tf.float32, name="scale") if self.minibatch_multiplier is not None: scale /= tf.cast(self.minibatch_multiplier, tf.float32) scale = self.undo_loss_scaling(scale) device.grad_clean[var] = grad * scale # Sum gradients across devices. if len(self._devices) > 1: with tfutil.absolute_name_scope(self.scope + "/Broadcast"), tflex.device(None): for all_vars in zip(*[ device.grad_clean.keys() for device in self._devices.values() ]): if len(all_vars) > 0 and all( dim > 0 for dim in all_vars[0].shape.as_list() ): # NCCL does not support zero-sized tensors. all_grads = [ device.grad_clean[var] for device, var in zip( self._devices.values(), all_vars) ] all_grads = all_sum(self._devices, all_grads) for device, var, grad in zip(self._devices.values(), all_vars, all_grads): device.grad_clean[var] = grad # Apply updates separately on each device. for device_idx, device in enumerate(self._devices.values()): with tfutil.absolute_name_scope(self.scope + "/Apply%d" % device_idx), tflex.device( device.name): # pylint: disable=cell-var-from-loop # Accumulate gradients over time. if self.minibatch_multiplier is None: acc_ok = tf.constant(True, name='acc_ok') device.grad_acc = OrderedDict(device.grad_clean) else: # Create variables. with tf.control_dependencies(None): for var in device.grad_clean.keys(): device.grad_acc_vars[var] = tf.Variable( tf.zeros(var.shape), trainable=False, name="grad_acc_var") device.grad_acc_count = tf.Variable( tf.zeros([]), trainable=False, name="grad_acc_count") # Track counter. count_cur = device.grad_acc_count + 1.0 count_inc_op = lambda: tf.assign(device.grad_acc_count, count_cur) count_reset_op = lambda: tf.assign(device.grad_acc_count, tf.zeros([])) acc_ok = (count_cur >= tf.cast(self.minibatch_multiplier, tf.float32)) all_ops.append( tf.cond(acc_ok, count_reset_op, count_inc_op)) # Track gradients. for var, grad in device.grad_clean.items(): acc_var = device.grad_acc_vars[var] acc_cur = acc_var + grad device.grad_acc[var] = acc_cur with tf.control_dependencies([acc_cur]): acc_inc_op = lambda: tf.assign(acc_var, acc_cur) acc_reset_op = lambda: tf.assign( acc_var, tf.zeros(var.shape)) all_ops.append( tf.cond(acc_ok, acc_reset_op, acc_inc_op)) # No overflow => apply gradients. all_ok = tf.reduce_all( tf.stack([acc_ok] + [ tf.reduce_all(tf.is_finite(g)) for g in device.grad_acc.values() ])) apply_op = lambda: device.optimizer.apply_gradients( [(tf.cast(grad, var.dtype), var) for var, grad in device.grad_acc.items()]) all_ops.append(tf.cond(all_ok, apply_op, tf.no_op)) # Adjust loss scaling. if self.use_loss_scaling: ls_inc_op = lambda: tf.assign_add(device.loss_scaling_var, self.loss_scaling_inc) ls_dec_op = lambda: tf.assign_sub(device.loss_scaling_var, self.loss_scaling_dec) ls_update_op = lambda: tf.group( tf.cond(all_ok, ls_inc_op, ls_dec_op)) all_ops.append(tf.cond(acc_ok, ls_update_op, tf.no_op)) # Last device => report statistics. if device_idx == len(self._devices) - 1: all_ops.append( autosummary.autosummary(self.id + "/learning_rate", self.learning_rate)) all_ops.append( autosummary.autosummary(self.id + "/overflow_frequency", tf.where(all_ok, 0, 1), condition=acc_ok)) if self.use_loss_scaling: all_ops.append( autosummary.autosummary( self.id + "/loss_scaling_log2", device.loss_scaling_var)) def finalize(): # Initialize variables. self.reset_optimizer_state() if self.use_loss_scaling: tfutil.init_uninitialized_vars([ device.loss_scaling_var for device in self._devices.values() ]) if self.minibatch_multiplier is not None: tfutil.run([ var.initializer for device in self._devices.values() for var in list(device.grad_acc_vars.values()) + [device.grad_acc_count] ]) # Group everything into a single op. with tfutil.absolute_name_scope(self.scope): return tf.group(*all_ops, name="TrainingOp"), finalize
def run( self, *in_arrays: Tuple[Union[np.ndarray, None], ...], input_transform: dict = None, output_transform: dict = None, return_as_list: bool = False, print_progress: bool = False, minibatch_size: int = None, num_gpus: int = 1, assume_frozen: bool = False, **dynamic_kwargs ) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]: """Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s). Args: input_transform: A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network. The dict must contain a 'func' field that points to a top-level function. The function is called with the input TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. output_transform: A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network. The dict must contain a 'func' field that points to a top-level function. The function is called with the output TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. return_as_list: True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs. print_progress: Print progress to the console? Useful for very large input arrays. minibatch_size: Maximum minibatch size to use, None = disable batching. num_gpus: Number of GPUs to use. assume_frozen: Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls. dynamic_kwargs: Additional keyword arguments to be passed into the network build function. """ assert len(in_arrays) == self.num_inputs assert not all(arr is None for arr in in_arrays) assert input_transform is None or util.is_top_level_function( input_transform["func"]) assert output_transform is None or util.is_top_level_function( output_transform["func"]) output_transform, dynamic_kwargs = _handle_legacy_output_transforms( output_transform, dynamic_kwargs) num_items = in_arrays[0].shape[0] if minibatch_size is None: minibatch_size = num_items # Construct unique hash key from all arguments that affect the TensorFlow graph. key = dict(input_transform=input_transform, output_transform=output_transform, num_gpus=num_gpus, assume_frozen=assume_frozen, dynamic_kwargs=dynamic_kwargs) def unwind_key(obj): if isinstance(obj, dict): return [(key, unwind_key(value)) for key, value in sorted(obj.items())] if callable(obj): return util.get_top_level_function_name(obj) return obj key = repr(unwind_key(key)) # Build graph. if key not in self._run_cache: with tfutil.absolute_name_scope( self.scope + "/_Run"), tf.control_dependencies(None): with tflex.device("/cpu:0"): in_expr = [ tf.placeholder(tf.float32, name=name) for name in self.input_names ] in_split = list( zip(*[tf.split(x, num_gpus) for x in in_expr])) out_split = [] for gpu in range(num_gpus): with tflex.device("/gpu:%d" % gpu): net_gpu = self.clone() if assume_frozen else self in_gpu = in_split[gpu] if input_transform is not None: in_kwargs = dict(input_transform) in_gpu = in_kwargs.pop("func")(*in_gpu, **in_kwargs) in_gpu = [in_gpu] if tfutil.is_tf_expression( in_gpu) else list(in_gpu) assert len(in_gpu) == self.num_inputs out_gpu = net_gpu.get_output_for(*in_gpu, return_as_list=True, **dynamic_kwargs) if output_transform is not None: out_kwargs = dict(output_transform) out_gpu = out_kwargs.pop("func")(*out_gpu, **out_kwargs) out_gpu = [out_gpu] if tfutil.is_tf_expression( out_gpu) else list(out_gpu) assert len(out_gpu) == self.num_outputs out_split.append(out_gpu) with tflex.device("/cpu:0"): out_expr = [ tf.concat(outputs, axis=0) for outputs in zip(*out_split) ] self._run_cache[key] = in_expr, out_expr # Run minibatches. in_expr, out_expr = self._run_cache[key] out_arrays = [ np.empty([num_items] + expr.shape.as_list()[1:], expr.dtype.name) for expr in out_expr ] for mb_begin in range(0, num_items, minibatch_size): if print_progress: print("\r%d / %d" % (mb_begin, num_items), end="") mb_end = min(mb_begin + minibatch_size, num_items) mb_num = mb_end - mb_begin mb_in = [ src[mb_begin:mb_end] if src is not None else np.zeros([mb_num] + shape[1:]) for src, shape in zip(in_arrays, self.input_shapes) ] mb_out = tf.get_default_session().run(out_expr, dict(zip(in_expr, mb_in))) for dst, src in zip(out_arrays, mb_out): dst[mb_begin:mb_end] = src # Done. if print_progress: print("\r%d / %d" % (num_items, num_items)) if not return_as_list: out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple( out_arrays) return out_arrays
train = EasyDict(run_func_name='training.training_loop.training_loop') # Options for training loop. G_args = EasyDict(func_name='training.networks_stylegan2.G_main') # Options for generator network. D_args = EasyDict(func_name='training.networks_stylegan2.D_stylegan2') # Options for discriminator network. G_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for generator optimizer. D_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for discriminator optimizer. G_loss = EasyDict(func_name='training.loss.G_logistic_ns_pathreg') # Options for generator loss. D_loss = EasyDict(func_name='training.loss.D_logistic_r1') # Options for discriminator loss. sched = EasyDict() # Options for TrainingSchedule. grid = EasyDict(size='8k', layout='random') # Options for setup_snapshot_image_grid(). sc = dnnlib.SubmitConfig() # Options for dnnlib.submit_run(). tf_config = {'rnd.np_random_seed': 1000} label_dtype = np.int64 sched.minibatch_gpu = 1 if 'G' not in globals(): with tflex.device('/gpu:0'): G = tflib.Network('G', num_channels=num_channels, resolution=resolution, label_size=label_size, fmap_base=fmap_base, **G_args) G.print_layers() Gs, Gs_finalize = G.clone2('Gs') Gs_finalize() D = tflib.Network('D', num_channels=num_channels, resolution=resolution, label_size=label_size, fmap_base=fmap_base, **D_args) D.print_layers() def rand_latent(n, seed=None): if seed is not None: if seed < 0: seed = 2*32 - seed np.random.seed(seed) result = np.random.randn(n, *G.input_shape[1:]) if seed is not None: np.random.seed()
def finalize_autosummaries() -> None: """Create the necessary ops to include autosummaries in TensorBoard report. Note: This should be done only once per graph. """ global _finalized tfutil.assert_tf_initialized() if _finalized: return None _finalized = True tfutil.init_uninitialized_vars( [var for vars_list in _vars.values() for var in vars_list]) # Create summary ops. with tflex.device(None), tf.control_dependencies(None): for name, vars_list in _vars.items(): name_id = name.replace("/", "_") with tfutil.absolute_name_scope("Autosummary/" + name_id): moments = tf.add_n(vars_list) moments /= moments[0] with tf.control_dependencies([moments ]): # read before resetting reset_ops = [ tf.assign(var, tf.zeros(3, dtype=_dtype)) for var in vars_list ] with tf.name_scope(None), tf.control_dependencies( reset_ops): # reset before reporting mean = moments[1] std = tf.sqrt(moments[2] - tf.square(moments[1])) tf.summary.scalar(name, mean) if enable_custom_scalars: tf.summary.scalar( "xCustomScalars/" + name + "/margin_lo", mean - std) tf.summary.scalar( "xCustomScalars/" + name + "/margin_hi", mean + std) # Setup layout for custom scalars. layout = None if enable_custom_scalars: cat_dict = OrderedDict() for series_name in sorted(_vars.keys()): p = series_name.split("/") cat = p[0] if len(p) >= 2 else "" chart = "/".join(p[1:-1]) if len(p) >= 3 else p[-1] if cat not in cat_dict: cat_dict[cat] = OrderedDict() if chart not in cat_dict[cat]: cat_dict[cat][chart] = [] cat_dict[cat][chart].append(series_name) categories = [] for cat_name, chart_dict in cat_dict.items(): charts = [] for chart_name, series_names in chart_dict.items(): series = [] for series_name in series_names: series.append( layout_pb2.MarginChartContent.Series( value=series_name, lower="xCustomScalars/" + series_name + "/margin_lo", upper="xCustomScalars/" + series_name + "/margin_hi")) margin = layout_pb2.MarginChartContent(series=series) charts.append(layout_pb2.Chart(title=chart_name, margin=margin)) categories.append(layout_pb2.Category(title=cat_name, chart=charts)) layout = summary_lib.custom_scalar_pb( layout_pb2.Layout(category=categories)) return layout
def _evaluate(self, Gs, Gs_kwargs, num_gpus): minibatch_size = num_gpus * self.minibatch_per_gpu # Construct TensorFlow graph for each GPU. result_expr = [] for gpu_idx in range(num_gpus): with tflex.device('/gpu:%d' % gpu_idx): Gs_clone = Gs.clone() # Generate images. latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) labels = self._get_random_labels_tf(self.minibatch_per_gpu) dlatents = Gs_clone.components.mapping.get_output_for( latents, labels, **Gs_kwargs) images = Gs_clone.get_output_for(latents, None, **Gs_kwargs) # Downsample to 256x256. The attribute classifiers were built for 256x256. if images.shape[2] > 256: factor = images.shape[2] // 256 images = tf.reshape(images, [ -1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor ]) images = tf.reduce_mean(images, axis=[3, 5]) # Run classifier for each attribute. result_dict = dict(latents=latents, dlatents=dlatents[:, -1]) for attrib_idx in self.attrib_indices: classifier = misc.load_pkl(classifier_urls[attrib_idx]) logits = classifier.get_output_for(images, None) predictions = tf.nn.softmax( tf.concat([logits, -logits], axis=1)) result_dict[attrib_idx] = predictions result_expr.append(result_dict) # Sampling loop. results = [] for begin in range(0, self.num_samples, minibatch_size): self._report_progress(begin, self.num_samples) results += tflib.run(result_expr) results = { key: np.concatenate([value[key] for value in results], axis=0) for key in results[0].keys() } # Calculate conditional entropy for each attribute. conditional_entropies = defaultdict(list) for attrib_idx in self.attrib_indices: # Prune the least confident samples. pruned_indices = list(range(self.num_samples)) pruned_indices = sorted( pruned_indices, key=lambda i: -np.max(results[attrib_idx][i])) pruned_indices = pruned_indices[:self.num_keep] # Fit SVM to the remaining samples. svm_targets = np.argmax(results[attrib_idx][pruned_indices], axis=1) for space in ['latents', 'dlatents']: svm_inputs = results[space][pruned_indices] try: svm = sklearn.svm.LinearSVC() svm.fit(svm_inputs, svm_targets) svm.score(svm_inputs, svm_targets) svm_outputs = svm.predict(svm_inputs) except: svm_outputs = svm_targets # assume perfect prediction # Calculate conditional entropy. p = [[ np.mean([ case == (row, col) for case in zip(svm_outputs, svm_targets) ]) for col in (0, 1) ] for row in (0, 1)] conditional_entropies[space].append(conditional_entropy(p))