def G_logistic_ns(G, D, opt, training_set, minibatch_size, latent_type='uniform', group_recons_lambda=0.): _ = opt if latent_type == 'uniform': latents = tf.random.uniform([minibatch_size, G.input_shapes[0][1]], minval=-2, maxval=2) elif latent_type == 'normal': latents = tf.random.normal([minibatch_size, G.input_shapes[0][1]]) else: raise ValueError('Latent type not supported: ' + latent_type) labels = training_set.get_random_labels_tf(minibatch_size) fake_images_out, _, group_feat = get_return_v( G.get_output_for(latents, labels, is_training=True, return_atts=False), 3) fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) loss = tf.nn.softplus(-fake_scores_out) # -log(sigmoid(fake_scores_out)) if group_recons_lambda > 0: fake_images_out_intanct, _, group_feat_intanct = get_return_v( G.get_output_for(latents, labels, is_training=True, return_atts=False, ncut_maxval=1), 3) loss_group_recons = recons_group(group_feat, group_feat_intanct) loss += group_recons_lambda * loss_group_recons return loss, None
def factor_vae_G(E, G, D, opt, training_set, minibatch_size, reals, labels, latent_type='normal', hy_gamma=1, recons_type='bernoulli_loss'): _ = opt, training_set means, log_var = get_return_v( E.get_output_for(reals, labels, is_training=True), 2) kl_loss = compute_gaussian_kl(means, log_var) kl_loss = autosummary('Loss/kl_loss', kl_loss) sampled = sample_from_latent_distribution(means, log_var) reconstructions = get_return_v( G.get_output_for(sampled, labels, is_training=True), 1) logits, probs = get_return_v(D.get_output_for(sampled, is_training=True), 2) # tc = E[log(p_real)-log(p_fake)] = E[logit_real - logit_fake] tc_loss = logits[:, 0] - logits[:, 1] # tc_loss = tf.reduce_mean(tc_loss, axis=0) reconstruction_loss = make_reconstruction_loss(reals, reconstructions, recons_type=recons_type) # reconstruction_loss = tf.reduce_mean(reconstruction_loss) reconstruction_loss = autosummary('Loss/recons_loss', reconstruction_loss) elbo = reconstruction_loss + kl_loss elbo = autosummary('Loss/fac_vae_elbo', elbo) loss = elbo + hy_gamma * tc_loss loss = autosummary('Loss/fac_vae_loss', loss) return loss
def so_vae(E, G, opt, training_set, minibatch_size, reals, labels, hy_1p=0, hy_beta=1, latent_type='normal', recons_type='bernoulli_loss'): _ = opt, training_set means, log_var = get_return_v( E.get_output_for(reals, labels, is_training=True), 2) kl_loss = compute_gaussian_kl(means, log_var) kl_loss = autosummary('Loss/kl_loss', kl_loss) sampled = sample_from_latent_distribution(means, log_var) reconstructions, lie_groups_as_fm, _, _, lie_algs, lie_alg_basis, _, lie_vars = get_return_v( G.get_output_for(sampled, labels, is_training=True), 8) # lie_groups_as_fm: [b, lat_dim, mat_dim, mat_dim] # lie_algs: [b, lat_dim, mat_dim, mat_dim] # lie_alg_basis: [1, lat_dim, mat_dim, mat_dim] reconstruction_loss = make_reconstruction_loss(reals, reconstructions, recons_type=recons_type) # reconstruction_loss = tf.reduce_mean(reconstruction_loss) reconstruction_loss = autosummary('Loss/recons_loss', reconstruction_loss) elbo = reconstruction_loss + hy_beta * kl_loss elbo = autosummary('Loss/so_vae_elbo', elbo) loss = elbo + hy_1p * tf.reduce_sum(lie_vars * lie_vars) loss = autosummary('Loss/so_vae_loss', loss) return loss
def betatc_vae(E, G, opt, training_set, minibatch_size, reals, labels, latent_type='normal', hy_beta=1, recons_type='bernoulli_loss'): _ = opt, training_set means, log_var = get_return_v( E.get_output_for(reals, labels, is_training=True), 2) kl_loss = compute_gaussian_kl(means, log_var) kl_loss = autosummary('Loss/kl_loss', kl_loss) sampled = sample_from_latent_distribution(means, log_var) reconstructions = get_return_v( G.get_output_for(sampled, labels, is_training=True), 1) reconstruction_loss = make_reconstruction_loss(reals, reconstructions, recons_type=recons_type) # reconstruction_loss = tf.reduce_mean(reconstruction_loss) reconstruction_loss = autosummary('Loss/recons_loss', reconstruction_loss) tc = (hy_beta - 1.) * total_correlation(sampled, means, log_var) # return tc + kl_loss elbo = reconstruction_loss + kl_loss elbo = autosummary('Loss/betatc_vae_elbo', elbo) loss = elbo + tc loss = autosummary('Loss/betatc_vae_loss', loss) return loss
def G_logistic_ns_regW(G, D, opt, training_set, minibatch_size, DM=None, latent_type='uniform', regW_lambda=1): _ = opt # latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) if latent_type == 'uniform': latents = tf.random.uniform([minibatch_size, G.input_shapes[0][1]], minval=-2, maxval=2) elif latent_type == 'normal': latents = tf.random.normal([minibatch_size, G.input_shapes[0][1]]) elif latent_type == 'trunc_normal': latents = tf.random.truncated_normal( [minibatch_size, G.input_shapes[0][1]]) else: raise ValueError('Latent type not supported: ' + latent_type) labels = training_set.get_random_labels_tf(minibatch_size) fake_images_out, _, z_w = get_return_v( G.get_output_for(latents, labels, is_training=True, return_atts=False), 3) fake_scores_out = get_return_v( D.get_output_for(fake_images_out, labels, is_training=True), 1) loss_z_w = calc_z_w_reg(z_w) loss = tf.nn.softplus(-fake_scores_out) # -log(sigmoid(fake_scores_out)) loss += regW_lambda * loss_z_w return loss, None
def generate_batch_factor_code(self, ground_truth_data, representation_function, num_points, random_state, batch_size): """Sample a single training sample based on a mini-batch of ground-truth data. Args: ground_truth_data: GroundTruthData to be sampled from. representation_function: Function that takes observation as input and outputs a representation. num_points: Number of points to sample. random_state: Numpy random state used for randomness. batch_size: Batchsize to sample points. Returns: representations: Codes (num_codes, num_points)-np array. factors: Factors generating the codes (num_factors, num_points)-np array. """ representations = None factors = None i = 0 while i < num_points: num_points_iter = min(num_points - i, batch_size) current_factors, current_observations = \ ground_truth_data.sample(num_points_iter, random_state) current_observations = misc.adjust_dynamic_range( current_observations, [-1., 1.], self.drange_net) if i == 0: factors = current_factors # representations = representation_function(current_observations) if self.has_label_place: representations = get_return_v( representation_function.run( current_observations, np.zeros([current_observations.shape[0], 0]), is_validation=True), 1) else: representations = get_return_v( representation_function.run(current_observations, is_validation=True), 1) else: factors = np.vstack((factors, current_factors)) if self.has_label_place: representations_i = get_return_v( representation_function.run( current_observations, np.zeros([current_observations.shape[0], 0]), is_validation=True), 1) else: representations_i = get_return_v( representation_function.run(current_observations, is_validation=True), 1) # representations = np.vstack((representations, # representation_function( # current_observations))) representations = np.vstack( (representations, representations_i)) i += num_points_iter return np.transpose(representations), np.transpose(factors)
def D_logistic_r1_vc2(G, D, opt, training_set, minibatch_size, reals, labels, gamma=10.0, latent_type='uniform', D_global_size=0): _ = opt, training_set discrete_latents = None if D_global_size > 0: discrete_latents = tf.random.uniform([minibatch_size], minval=0, maxval=D_global_size, dtype=tf.int32) discrete_latents = tf.one_hot(discrete_latents, D_global_size) if latent_type == 'uniform': latents = tf.random.uniform([minibatch_size] + [G.input_shapes[0][1] - D_global_size], minval=-2, maxval=2) elif latent_type == 'normal': latents = tf.random_normal([minibatch_size] + [G.input_shapes[0][1] - D_global_size]) elif latent_type == 'trunc_normal': latents = tf.random.truncated_normal( [minibatch_size] + [G.input_shapes[0][1] - D_global_size]) else: raise ValueError('Latent type not supported: ' + latent_type) if D_global_size > 0: latents = tf.concat([discrete_latents, latents], axis=1) fake_images_out = get_return_v( G.get_output_for(latents, labels, is_training=True, return_atts=False), 1) real_scores_out = get_return_v( D.get_output_for(reals, labels, is_training=True), 1) fake_scores_out = get_return_v( D.get_output_for(fake_images_out, labels, is_training=True), 1) real_scores_out = autosummary('Loss/scores/real', real_scores_out) fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) loss = tf.nn.softplus(fake_scores_out) # -log(1-sigmoid(fake_scores_out)) loss += tf.nn.softplus(-real_scores_out) # -log(sigmoid(real_scores_out)) # pylint: disable=invalid-unary-operand-type with tf.name_scope('GradientPenalty'): real_grads = tf.gradients(tf.reduce_sum(real_scores_out), [reals])[0] gradient_penalty = tf.reduce_sum(tf.square(real_grads), axis=[1, 2, 3]) gradient_penalty = autosummary('Loss/gradient_penalty', gradient_penalty) reg = gradient_penalty * (gamma * 0.5) return loss, reg
def plot_rot_fn(network, seeds, latent_pair, n_samples_per, bound, rot_start, rot_end, rot_interval, coord_adj, load_gan=False): tflib.init_tf() print('Loading networks from "%s"...' % network) if load_gan: _G, _D, I, G = misc.load_pkl(network) else: E, G = get_return_v(misc.load_pkl(network), 2) G_kwargs = dnnlib.EasyDict() G_kwargs.is_validation = True G_kwargs.randomize_noise = True G_kwargs.minibatch_size=8 distance_measure = misc.load_pkl( 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl' ) distance_rot_ls = [] rot_ls = list(range(int(rot_start), int(rot_end) + 1, int(rot_interval))) mark_idxs = [] for rot_idx, rot in enumerate(rot_ls): print('Generating images for rotation degree %d (%d/%d) ...' % (rot, rot_idx, len(rot_ls))) if rot in [-180, -90, 0, 90, 180]: mark_idxs.append(rot_idx) distance_ls = [] for seed_idx, seed in enumerate(seeds): rnd = np.random.RandomState(seed) z = sample_grid_z(rnd, G, latent_pair, n_samples_per, bound, rot) images = get_return_v( G.run(z, None, **G_kwargs), 1) # [n_samples_per*n_samples_per, channel, height, width] distance_ls.append( measure_distance(images, n_samples_per, distance_measure)) distance_rot_ls.append(np.mean(np.array(distance_ls))) plot_fn(rot_ls, distance_rot_ls, rot_start, rot_end, mark_idxs, coord_adj=coord_adj)
def G_logistic_ns_info_gan(G, D, I, opt, training_set, minibatch_size, latent_type='uniform', C_lambda=1, norm_ord=2, group_recons_lambda=0., **kwargs): _ = opt C_global_size = G.input_shapes[0][1] if latent_type == 'uniform': latents = tf.random.uniform([minibatch_size] + [G.input_shapes[0][1]], minval=-2, maxval=2) elif latent_type == 'normal': latents = tf.random.normal([minibatch_size] + [G.input_shapes[0][1]]) else: raise ValueError('Latent type not supported: ' + latent_type) labels = training_set.get_random_labels_tf(minibatch_size) fake_out, _, group_feat = get_return_v( G.get_output_for(latents, labels, is_training=True, return_atts=False), 3) fake_scores_out = D.get_output_for(fake_out, labels, is_training=True) G_loss = tf.nn.softplus(-fake_scores_out) # -log(sigmoid(fake_scores_out)) regress_out = I.get_output_for(fake_out, is_training=True) I_loss = calc_regress_loss(latents, regress_out, C_global_size, C_lambda, minibatch_size, norm_ord=norm_ord) I_loss = autosummary('Loss/I_loss', I_loss) G_loss += I_loss if group_recons_lambda > 0: fake_images_out_intanct, _, group_feat_intanct = get_return_v( G.get_output_for(latents, labels, is_training=True, return_atts=False, ncut_maxval=1), 3) loss_group_recons = recons_group(group_feat, group_feat_intanct) G_loss += group_recons_lambda * loss_group_recons return G_loss, None
def lie_vae_with_split(E, G, opt, training_set, minibatch_size, reals, labels, latent_type='normal', hy_dcp=1, hy_hes=0, hy_lin=0, hy_ncut=1, hy_rec=1, recons_type='bernoulli_loss'): _ = opt, training_set means, log_var, group_feats_E = get_return_v( E.get_output_for(reals, labels, is_training=True), 3) kl_loss = compute_gaussian_kl(means, log_var) kl_loss = autosummary('Loss/kl_loss', kl_loss) mat_dim = int(math.sqrt(group_feats_E.get_shape().as_list()[1])) assert mat_dim * mat_dim == group_feats_E.get_shape().as_list()[1] group_feats_E = tf.reshape(group_feats_E, [minibatch_size, mat_dim, mat_dim]) sampled = sample_from_latent_distribution(means, log_var) sampled_split_ls = split_latents(sampled, minibatch_size, hy_ncut=hy_ncut) sampled_split = tf.concat(sampled_split_ls, axis=0) labels_split = tf.concat([labels] * len(sampled_split_ls), axis=0) sampled_all = tf.concat([sampled, sampled_split], axis=0) labels_all = tf.concat([labels, labels_split], axis=0) reconstructions, group_feats_G, _, _, lie_alg_feats, lie_alg_basis = get_return_v( G.get_output_for(sampled_all, labels_all, is_training=True), 6) lie_group_loss = make_lie_group_loss_with_split( group_feats_E, group_feats_G, lie_alg_feats, lie_alg_basis, minibatch_size, hy_rec, hy_dcp, hy_hes, hy_lin, hy_ncut) lie_group_loss = autosummary('Loss/lie_group_loss', lie_group_loss) reconstruction_loss = make_reconstruction_loss( reals, reconstructions[:minibatch_size], recons_type=recons_type) # reconstruction_loss = tf.reduce_mean(reconstruction_loss) reconstruction_loss = autosummary('Loss/recons_loss', reconstruction_loss) elbo = reconstruction_loss + kl_loss elbo = autosummary('Loss/lie_vae_elbo', elbo) loss = elbo + lie_group_loss loss = autosummary('Loss/lie_vae_loss', loss) return loss
def group_subspace_vae(E, G, opt, training_set, minibatch_size, reals, labels, subgroup_sizes_ls, subspace_sizes_ls, latent_type='normal', hy_beta=1, hy_hes=0, hy_rec=1, hy_commute=0, forward_eg=False, recons_type='bernoulli_loss'): _ = opt, training_set means, log_var, group_feats_E = get_return_v( E.get_output_for(reals, labels, is_training=True), 3) kl_loss = compute_gaussian_kl(means, log_var) kl_loss = autosummary('Loss/kl_loss', kl_loss) sampled = sample_from_latent_distribution(means, log_var) reconstructions, group_feats_G, _, _, _, lie_alg_basis_flattened, _, _ = get_return_v( G.get_output_for(tf.concat([sampled, group_feats_E], axis=1) if forward_eg else sampled, labels, is_training=True), 8) lie_group_loss = make_group_subspace_loss( minibatch_size=minibatch_size, group_feats_E=group_feats_E, group_feats_G=group_feats_G, subgroup_sizes_ls=subgroup_sizes_ls, subspace_sizes_ls=subspace_sizes_ls, lie_alg_basis_flattened=lie_alg_basis_flattened, hy_hes=hy_hes, hy_rec=hy_rec, hy_commute=hy_commute) reconstruction_loss = make_reconstruction_loss(reals, reconstructions, recons_type=recons_type) reconstruction_loss = autosummary('Loss/recons_loss', reconstruction_loss) elbo = reconstruction_loss + hy_beta * kl_loss elbo = autosummary('Loss/elbo', elbo) loss = elbo + lie_group_loss loss = autosummary('Loss/loss', loss) return loss
def dip_vae(E, G, opt, training_set, minibatch_size, reals, labels, latent_type='normal', dip_type='dip_vae_i', lambda_d_factor=10., lambda_od=1., recons_type='bernoulli_loss'): _ = opt, training_set means, log_var = get_return_v( E.get_output_for(reals, labels, is_training=True), 2) kl_loss = compute_gaussian_kl(means, log_var) kl_loss = autosummary('Loss/kl_loss', kl_loss) sampled = sample_from_latent_distribution(means, log_var) reconstructions = get_return_v( G.get_output_for(sampled, labels, is_training=True), 1) reconstruction_loss = make_reconstruction_loss(reals, reconstructions, recons_type=recons_type) # reconstruction_loss = tf.reduce_mean(reconstruction_loss) reconstruction_loss = autosummary('Loss/recons_loss', reconstruction_loss) # Regularization cov_z_mean = compute_covariance_z_mean(means) lambda_d = lambda_d_factor * lambda_od if dip_type == 'dip_vae_i': # mu = means is [batch_size, num_latent] # Compute cov_p(x) [mu(x)] = E[mu*mu^T] - E[mu]E[mu]^T] cov_dip_regularizer = regularize_diag_off_diag_dip( cov_z_mean, lambda_od, lambda_d) elif dip_type == 'dip_vae_ii': cov_enc = tf.matrix_diag(tf.exp(log_var)) expectation_cov_enc = tf.reduce_mean(cov_enc, axis=0) cov_z = expectation_cov_enc + cov_z_mean cov_dip_regularizer = regularize_diag_off_diag_dip( cov_z, lambda_od, lambda_d) else: raise NotImplementedError("DIP variant not supported.") elbo = reconstruction_loss + kl_loss elbo = autosummary('Loss/dip_vae_elbo', elbo) loss = elbo + cov_dip_regularizer loss = autosummary('Loss/dip_vae_loss', loss) return loss
def layer(x, layer_idx, up): x, atts = get_return_v( build_C_spgroup_layers_with_latents_ready( x, 'SP_latents', latent_split_ls_for_std_gen[layer_idx], layer_idx, latents_ready_ls[layer_idx], return_atts=return_atts, resolution=resolution, n_subs=n_subs, **kwargs), 2) if up: x = upscale2d_conv2d(x, fmaps=nf(res - 1), kernel=3, use_wscale=use_wscale) else: x = conv2d(x, fmaps=nf(res - 1), kernel=3, use_wscale=use_wscale) if randomize_noise: noise = tf.random_normal( [tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype) else: noise = tf.cast(noise_inputs[layer_idx], x.dtype) noise_strength = tf.get_variable('noise_strength', shape=[], initializer=tf.initializers.zeros()) x += noise * tf.cast(noise_strength, x.dtype) x = PN(act(apply_bias(x))) return x, atts
def generate_grids(network, seeds, latent_pair, n_samples_per=10, bound=2, rot=0, load_gan=False): tflib.init_tf() print('Loading networks from "%s"...' % network) if load_gan: _G, _D, I, G = misc.load_pkl(network) else: E, G = get_return_v(misc.load_pkl(network), 2) G_kwargs = dnnlib.EasyDict() G_kwargs.is_validation = True G_kwargs.randomize_noise = True G_kwargs.minibatch_size = 8 distance_measure = misc.load_pkl( 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl' ) distance_ls = [] for seed_idx, seed in enumerate(seeds): print('Generating images for seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds))) rnd = np.random.RandomState(seed) z = sample_grid_z(rnd, G, latent_pair, n_samples_per, bound, rot) images = get_return_v( G.run(z, None, **G_kwargs), 1) # [n_samples_per*n_samples_per, channel, height, width] distance_ls.append( measure_distance(images, n_samples_per, distance_measure)) images = add_outline(images, width=1) n_samples_square, c, h, w = np.shape(images) assert n_samples_square == n_samples_per * n_samples_per images = np.reshape(images, (n_samples_per, n_samples_per, c, h, w)) images = np.transpose(images, [0, 3, 1, 4, 2]) images = np.reshape(images, (n_samples_per * h, n_samples_per * w, c)) images = misc.adjust_dynamic_range(images, [0, 1], [0, 255]) images = np.rint(images).clip(0, 255).astype(np.uint8) PIL.Image.fromarray(images, 'RGB').save( dnnlib.make_run_dir_path('seed%04d.png' % seed)) print('mean_distance:', np.mean(np.array(distance_ls)))
def coma_vae(E, G, opt, training_set, minibatch_size, reals, labels, latent_type='normal', hy_gamma=1, epsilon=1, recons_type='bernoulli_loss'): _ = opt, training_set means, log_var = get_return_v( E.get_output_for(reals, labels, is_training=True), 2) kl_loss = compute_gaussian_kl(means, log_var) kl_loss = autosummary('Loss/kl_loss', kl_loss) sampled = sample_from_latent_distribution(means, log_var) delta_mask = get_delta_mask(sampled) delta_sampled = get_delta_sampled(sampled, delta_mask, epsilon) sampled_all = tf.concat([sampled, delta_sampled], axis=0) labels_all = tf.concat([labels, labels], axis=0) fakes_all = get_return_v( G.get_output_for(sampled_all, labels_all, is_training=True), 1) reconstructions, _ = tf.split(fakes_all, 2, axis=0) reconstruction_loss = make_reconstruction_loss(reals, reconstructions, recons_type=recons_type) # reconstruction_loss = tf.reduce_mean(reconstruction_loss) reconstruction_loss = autosummary('Loss/recons_loss', reconstruction_loss) # tc = (hy_beta - 1.) * total_correlation(sampled, means, log_var) # return tc + kl_loss means_all, log_var_all = get_return_v( E.get_output_for(fakes_all, labels_all, is_training=True), 2) reg_sampled_all = sample_from_latent_distribution(means_all, log_var_all) reg_1, reg_2 = tf.split(reg_sampled_all, 2, axis=0) coma_loss = get_coma_loss(sampled, delta_sampled, reg_1, reg_2, delta_mask) elbo = reconstruction_loss + kl_loss elbo = autosummary('Loss/coma_vae_elbo', elbo) # loss = elbo + tc # loss = autosummary('Loss/betatc_vae_loss', loss) loss = elbo + hy_gamma * coma_loss loss = autosummary('Loss/coma_vae_loss', loss) return loss
def build_trans_mask_to_feat_encoder_layer(x_mask, dlatents_in, name, n_layers, scope_idx, is_training, wh, feat_cnn_dim, construct_feat_by_concat=False, trans_dim=512, dff=512, trans_rate=0.1, **kwargs): ''' Build mask_to_feat forwarding transformer to predict semantic variation masks. x_mask: [b, n_masks, wh * wh] dlatents_in: [b, n_masks] ''' with tf.variable_scope(name + '-' + str(scope_idx)): b = tf.shape(x_mask)[0] n_masks = x_mask.get_shape().as_list()[-2] with tf.variable_scope('FeatEncoding'): x = apply_bias(dense_layer_last_dim(x_mask, trans_dim)) feat_logits = get_return_v( trans_encoder_basic(x, is_training, None, n_layers, trans_dim, num_heads=8, dff=dff, rate=trans_rate), 1) # (b, z_dim, d_model) # [b, n_masks, d_model] with tf.variable_scope('ConstructFeatMap'): assert trans_dim % (wh * wh) == 0 feat_precnn_dim = trans_dim // (wh * wh) feat_logits = tf.reshape(feat_logits, [-1, feat_precnn_dim, wh, wh]) feat_on_masks = conv2d_layer( feat_logits, fmaps=feat_cnn_dim, kernel=3) # [b*n_masks, feat_cnn_dim, wh, wh] feat_on_masks = tf.reshape(feat_on_masks, [-1, n_masks, feat_cnn_dim, wh, wh]) if construct_feat_by_concat: construct_feat = construct_feat_by_concat_masks_latent( feat_on_masks, tf.reshape(x_mask, [b, n_masks, wh, wh]), dlatents_in) else: construct_feat = construct_feat_by_masks_latent( feat_on_masks, tf.reshape(x_mask, [b, n_masks, wh, wh]), dlatents_in) # [b, feat_cnn_dim, h, w] return construct_feat
def _compute_variances(self, representation_model, batch_size, random_state, eval_batch_size=50): representations_ls = [] for i in range(batch_size // 100): observations = self.ground_truth_data.sample_observations(100, random_state) observations = misc.adjust_dynamic_range(observations, [-1., 1.], self.drange_net) # representations = utils.obtain_representation(observations, # representation_model, # eval_batch_size) if self.has_label_place: representations = get_return_v(representation_model.run(observations, np.zeros([observations.shape[0], 0]), is_validation=True), 1) else: representations = get_return_v(representation_model.run(observations, is_validation=True), 1) representations_ls.append(representations) representations = np.concatenate(tuple(representations_ls), axis=0) # representations = np.transpose(representations) assert representations.shape[0] == batch_size return np.var(representations, axis=0, ddof=1)
def measure_distance(images, n_samples_per, distance_measure): assert images.shape[0] == n_samples_per * n_samples_per images = (images + 1) * (255 / 2) # [-1, -1] -> [0, 255] # distance_measure = misc.load_pkl( # 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl' # ) dis_sum = 0 for i in range(n_samples_per): v = get_return_v( distance_measure.run( images[i * n_samples_per:(i + 1) * n_samples_per - 1], images[i * n_samples_per + 1:(i + 1) * n_samples_per]), 1) dis_sum += v.sum() return dis_sum
def factor_vae_sindis_D(E, D, opt, training_set, minibatch_size, reals, labels, latent_type='normal'): _ = opt, training_set means, log_var = get_return_v( E.get_output_for(reals, labels, is_training=True), 2) sampled = sample_from_latent_distribution(means, log_var) shuffled = shuffle_codes(sampled) real_scores_out, _ = get_return_v( D.get_output_for(shuffled, is_training=True), 2) fake_scores_out, _ = get_return_v( D.get_output_for(sampled, is_training=True), 2) real_scores_out = autosummary('Loss/scores/real', real_scores_out) fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) loss = tf.nn.softplus(fake_scores_out) # -log(1-sigmoid(fake_scores_out)) loss += tf.nn.softplus(-real_scores_out) # -log(sigmoid(real_scores_out)) # pylint: disable=invalid-unary-operand-type loss = autosummary('Loss/fac_vae_discr_loss', loss) return loss
def factor_vae_sindis_G(E, G, D, opt, training_set, minibatch_size, reals, labels, latent_type='normal', hy_gamma=1, recons_type='bernoulli_loss'): _ = opt, training_set means, log_var = get_return_v( E.get_output_for(reals, labels, is_training=True), 2) kl_loss = compute_gaussian_kl(means, log_var) kl_loss = autosummary('Loss/kl_loss', kl_loss) sampled = sample_from_latent_distribution(means, log_var) reconstructions = get_return_v( G.get_output_for(sampled, labels, is_training=True), 1) fake_scores_out, _ = get_return_v( D.get_output_for(sampled, is_training=True), 2) tc_loss = tf.nn.softplus( -fake_scores_out) # -log(sigmoid(fake_scores_out)) # loss = tf.reduce_mean(loss) reconstruction_loss = make_reconstruction_loss(reals, reconstructions, recons_type=recons_type) # reconstruction_loss = tf.reduce_mean(reconstruction_loss) reconstruction_loss = autosummary('Loss/recons_loss', reconstruction_loss) elbo = reconstruction_loss + kl_loss elbo = autosummary('Loss/fac_vae_elbo', elbo) loss = elbo + hy_gamma * tc_loss loss = autosummary('Loss/fac_vae_loss', loss) return loss
def factor_vae_D(E, D, opt, training_set, minibatch_size, reals, labels, latent_type='normal'): _ = opt, training_set means, log_var = get_return_v( E.get_output_for(reals, labels, is_training=True), 2) sampled = sample_from_latent_distribution(means, log_var) shuffled = shuffle_codes(sampled) logits, probs = get_return_v(D.get_output_for(sampled, is_training=True), 2) _, probs_shuffled = get_return_v( D.get_output_for(shuffled, is_training=True), 2) loss = -(0.5 * tf.log(probs[:, 0]) + 0.5 * tf.log(probs_shuffled[:, 1])) # loss = -tf.add( # 0.5 * tf.reduce_mean(tf.log(probs[:, 0])), # 0.5 * tf.reduce_mean(tf.log(probs_shuffled[:, 1])), # name="discriminator_loss") loss = autosummary('Loss/fac_vae_discr_loss', loss) return loss
def _generate_training_sample(self, representation_model, batch_size, random_state, global_variances, active_dims): # Select random coordinate to keep fixed. factor_index = random_state.randint(self.ground_truth_data.num_factors) # Sample two mini batches of latent variables. factors = self.ground_truth_data.sample_factors(batch_size, random_state) # Fix the selected factor across mini-batch. factors[:, factor_index] = factors[0, factor_index] # Obtain the observations. observations = self.ground_truth_data.sample_observations_from_factors( factors, random_state) observations = misc.adjust_dynamic_range(observations, [-1., 1.], self.drange_net) # pdb.set_trace() if self.has_label_place: representations = get_return_v(representation_model.run(observations, np.zeros([observations.shape[0], 0]), is_validation=True), 1) else: representations = get_return_v(representation_model.run(observations, is_validation=True), 1) local_variances = np.var(representations, axis=0, ddof=1) argmin = np.argmin(local_variances[active_dims] / global_variances[active_dims]) return factor_index, argmin
def torgb(x, y, res): # res = 2..resolution_log2 with tf.variable_scope('ToRGB'): # t = apply_bias_act(modulated_conv2d_layer(x, latents_ready_ls[res*2-3], fmaps=num_channels, kernel=1, # demodulate=False, fused_modconv=fused_modconv)) t, atts = get_return_v( build_C_spgroup_layers_with_latents_ready( x, 'SP_latents', latent_split_ls_for_std_gen[res * 2 - 3], res * 2 - 3, latents_ready_ls[res * 2 - 3], return_atts=return_atts, resolution=resolution, n_subs=n_subs, **kwargs), 2) t = apply_bias_act(conv2d_layer(t, fmaps=num_channels, kernel=1)) return t if y is None else y + t, atts
def build_trans_pos_to_mask_layer(x, name, n_layers, scope_idx, is_training, wh, n_subs, resolution=128, trans_dim=512, dff=512, trans_rate=0.1, **kwargs): ''' Build pos_to_mask forwarding transformer to predict semantic variation masks. ''' with tf.variable_scope(name + '-' + str(scope_idx)): with tf.variable_scope('PosConstant'): n_masks = x.get_shape().as_list()[-1] pos = tf.get_variable('const', shape=[1, n_masks, trans_dim], initializer=tf.initializers.random_normal()) pos = tf.tile(tf.cast(pos, x.dtype), [tf.shape(x)[0], 1, 1]) with tf.variable_scope('MaskEncoding'): mask_logits = get_return_v( trans_encoder_basic(pos, is_training, None, n_layers, trans_dim, num_heads=8, dff=dff, rate=trans_rate), 1) # (b, z_dim, d_model) with tf.variable_scope('MaskMapping'): atts = sc_masks(mask_logits, n_masks, n_subs, wh) # [b, n_masks, h, w] masks = tf.reshape(atts, [-1, n_masks, wh * wh]) # y = tf.concat([x[:, :, np.newaxis], masks], axis=-1) y = masks with tf.variable_scope('ReshapeAttns'): atts = tf.reshape(atts, [-1, wh, wh, 1]) atts = tf.image.resize(atts, size=(resolution, resolution)) atts = tf.reshape(atts, [-1, n_masks, 1, resolution, resolution]) return y, atts
def generate_attr_dataset(network_pkl, n_data_samples, start_seed, resolution, run_batch, used_semantics_ls, attr2idx_dict, create_new_G, new_func_name, truncation_psi=0.5): ''' used_semantics_ls: ['azimuth', 'haircolor', ...] attr2idx_dict: {'azimuth': 10, 'haircolor': 17, 'smile': 6, ...} ''' tflib.init_tf() print('Loading networks from "%s"...' % network_pkl) _G, _D, I, Gs = misc.load_pkl(network_pkl) if create_new_G: Gs = Gs.convert(new_func_name=new_func_name) attr = {'names': used_semantics_ls} idxes = [attr2idx_dict[name] for name in used_semantics_ls] attr_ls = [] for seed in range(start_seed, start_seed + n_data_samples, run_batch): rnd = np.random.RandomState(seed) if seed + run_batch >= start_seed + n_data_samples: b = start_seed + n_data_samples - seed else: b = run_batch Gs_kwargs = dnnlib.EasyDict(randomize_noise=True, minibatch_size=b, is_validation=True) # z = rnd.randn(b, *Gs.input_shape[1:]) # [minibatch, component] z = truncated_z_sample(b, Gs.input_shape[1], truncation=truncation_psi, seed=seed) images = get_return_v(Gs.run(z, None, **Gs_kwargs), 1) # [b, c, h, w] shrink = Gs.output_shape[-1] // resolution if shrink > 1: _, c, h, w = images.shape images = images.reshape(b, c, h // shrink, shrink, w // shrink, shrink).mean(5).mean(3) images = misc.adjust_dynamic_range(images, [-1, 1], [0, 255]) images = np.transpose(images, [0, 2, 3, 1]) images = np.rint(images).clip(0, 255).astype(np.uint8) for i in range(len(z)): PIL.Image.fromarray(images[i], 'RGB').save(dnnlib.make_run_dir_path('seed%07d.png' % (seed + i))) attr_ls.append(z[:, idxes]) attr['data'] = np.concatenate(attr_ls, axis=0) with open(dnnlib.make_run_dir_path(f'attrs.pkl'), 'wb') as f: pickle.dump(attr, f)
def build_trans_z_to_mask_layer(x, name, n_layers, scope_idx, is_training, wh, n_subs, resolution=128, trans_dim=512, dff=512, trans_rate=0.1, **kwargs): ''' Build z_to_mask forwarding transformer to predict semantic variation masks. ''' with tf.variable_scope(name + '-' + str(scope_idx)): with tf.variable_scope('MaskEncoding'): x = x[:, :, np.newaxis] n_masks = x.get_shape().as_list()[-2] mask_logits = get_return_v( trans_encoder_basic(x, is_training, None, n_layers, trans_dim, num_heads=8, dff=dff, rate=trans_rate), 1) # (b, z_dim, d_model) with tf.variable_scope('MaskMapping'): atts = sc_masks(mask_logits, n_masks, n_subs, wh) # [b, n_masks, h, w] masks = tf.reshape(atts, [-1, n_masks, wh * wh]) # y = tf.concat([x[:, :, np.newaxis], masks], axis=-1) y = masks with tf.variable_scope('ReshapeAttns'): atts = tf.reshape(atts, [-1, wh, wh, 1]) atts = tf.image.resize(atts, size=(resolution, resolution)) atts = tf.reshape(atts, [-1, n_masks, 1, resolution, resolution]) return y, atts
def group_norm_vae(E, G, opt, training_set, minibatch_size, reals, labels, latent_type='normal', hy_beta=1, hy_hes=0, hy_commute=0, recons_type='bernoulli_loss'): _ = opt, training_set means, log_var = get_return_v( E.get_output_for(reals, labels, is_training=True), 2) kl_loss = compute_gaussian_kl(means, log_var) kl_loss = autosummary('Loss/kl_loss', kl_loss) sampled = sample_from_latent_distribution(means, log_var) reconstructions, group_feats_G, _, _, lie_alg_feats, lie_alg_basis_norm, _, lie_vars = get_return_v( G.get_output_for(sampled, labels, is_training=True), 8) lie_group_loss = make_lie_group_norm_loss( group_feats_G=group_feats_G, lie_alg_feats=lie_alg_feats, lie_alg_basis_norm=lie_alg_basis_norm, minibatch_size=minibatch_size, hy_hes=hy_hes, hy_commute=hy_commute) reconstruction_loss = make_reconstruction_loss(reals, reconstructions, recons_type=recons_type) reconstruction_loss = autosummary('Loss/recons_loss', reconstruction_loss) elbo = reconstruction_loss + hy_beta * kl_loss elbo = autosummary('Loss/elbo', elbo) loss = elbo + lie_group_loss loss = autosummary('Loss/loss', loss) return loss
def layer(x, layer_idx, fmaps, kernel, up=False): x, atts = get_return_v( build_C_spgroup_layers_with_latents_ready( x, 'SP_latents', latent_split_ls_for_std_gen[layer_idx], layer_idx, latents_ready_ls[layer_idx], return_atts=return_atts, resolution=resolution, n_subs=n_subs, **kwargs), 2) x = conv2d_layer(x, fmaps=fmaps, kernel=kernel, up=up) if randomize_noise: noise = tf.random_normal( [tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype) else: noise = tf.cast(noise_inputs[layer_idx], x.dtype) noise_strength = tf.get_variable('noise_strength', shape=[], initializer=tf.initializers.zeros()) x += noise * tf.cast(noise_strength, x.dtype) return apply_bias_act(x, act=act), atts
def _evaluate(self, Gs, Gs_kwargs, num_gpus, **kwargs): minibatch_size = num_gpus * self.minibatch_per_gpu inception = misc.load_pkl( 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/inception_v3_features.pkl' ) activations = np.empty([self.num_images, inception.output_shape[1]], dtype=np.float32) # Calculate statistics for reals. cache_file = self._get_cache_file_for_reals(num_images=self.num_images) os.makedirs(os.path.dirname(cache_file), exist_ok=True) if os.path.isfile(cache_file): mu_real, sigma_real = misc.load_pkl(cache_file) else: for idx, images in enumerate( self._iterate_reals(minibatch_size=minibatch_size)): begin = idx * minibatch_size end = min(begin + minibatch_size, self.num_images) if images.shape[1] == 1: images = np.tile(images, [1, 3, 1, 1]) activations[begin:end] = inception.run(images[:end - begin], num_gpus=num_gpus, assume_frozen=True) if end == self.num_images: break mu_real = np.mean(activations, axis=0) sigma_real = np.cov(activations, rowvar=False) misc.save_pkl((mu_real, sigma_real), cache_file) # Construct TensorFlow graph. result_expr = [] for gpu_idx in range(num_gpus): with tf.device('/gpu:%d' % gpu_idx): # Gs_clone = Gs.clone() Gs_clone = Gs inception_clone = inception.clone() latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) labels = self._get_random_labels_tf(self.minibatch_per_gpu) images = get_return_v( Gs_clone.get_output_for(latents, labels, **Gs_kwargs), 1) # images, _ = Gs_clone.get_output_for(latents, labels, **Gs_kwargs) images = tflib.convert_images_to_uint8(images) if images.get_shape().as_list()[1] == 1: images = tf.tile(images, [1, 3, 1, 1]) result_expr.append(inception_clone.get_output_for(images)) # Calculate statistics for fakes. for begin in range(0, self.num_images, minibatch_size): self._report_progress(begin, self.num_images) end = min(begin + minibatch_size, self.num_images) activations[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end - begin] mu_fake = np.mean(activations, axis=0) sigma_fake = np.cov(activations, rowvar=False) # Calculate FID. m = np.square(mu_fake - mu_real).sum() s, _ = scipy.linalg.sqrtm(np.dot(sigma_fake, sigma_real), disp=False) # pylint: disable=no-member dist = m + np.trace(sigma_fake + sigma_real - 2 * s) self._report_result(np.real(dist))
def G_logistic_ns_ps_sc(G, D, I, opt, training_set, minibatch_size, I_info=None, latent_type='uniform', C_lambda=1, epsilon=0.4, random_eps=False, use_cascade=False, cascade_dim=None, group_recons_lambda=0.): _ = opt C_global_size = G.input_shapes[0][1] if latent_type == 'uniform': latents = tf.random.uniform([minibatch_size] + [G.input_shapes[0][1]], minval=-2, maxval=2) elif latent_type == 'normal': latents = tf.random.normal([minibatch_size] + [G.input_shapes[0][1]]) else: raise ValueError('Latent type not supported: ' + latent_type) # Sample delta latents if use_cascade: C_delta_latents = tf.cast(tf.one_hot(cascade_dim, C_global_size), latents.dtype) C_delta_latents = tf.tile(C_delta_latents[tf.newaxis, :], [minibatch_size, 1]) print('after onehot, C_delta_latents.shape:', C_delta_latents.get_shape().as_list()) else: C_delta_latents = tf.random.uniform([minibatch_size], minval=0, maxval=C_global_size, dtype=tf.int32) C_delta_latents = tf.cast(tf.one_hot(C_delta_latents, C_global_size), latents.dtype) if not random_eps: delta_target = C_delta_latents * epsilon else: epsilon = epsilon * tf.random.normal( [minibatch_size, 1], mean=0.0, stddev=2.0) delta_target = C_delta_latents * epsilon delta_latents = delta_target + latents labels = training_set.get_random_labels_tf(2 * minibatch_size) latents_all = tf.concat([latents, delta_latents], axis=0) fake_all_out, _, group_feat = get_return_v( G.get_output_for(latents_all, labels, is_training=True, return_atts=False), 3) fake1_out, fake2_out = tf.split(fake_all_out, 2, axis=0) group_feat1, group_feat2 = tf.split(group_feat, 2, axis=0) if I_info is not None: fake_scores_out, hidden = D.get_output_for(fake1_out, labels, is_training=True) else: fake_scores_out = D.get_output_for(fake1_out, labels, is_training=True) G_loss = tf.nn.softplus(-fake_scores_out) # -log(sigmoid(fake_scores_out)) regress_out = I.get_output_for(fake_all_out, is_training=True) reg1_out, reg2_out = tf.split(regress_out, 2, axis=0) I_loss = calc_ps_loss(latents, delta_latents, reg1_out, reg2_out, C_delta_latents, C_lambda) I_loss = autosummary('Loss/I_loss', I_loss) G_loss += I_loss if group_recons_lambda > 0: fake_images_out_intanct, _, group_feat_intanct = get_return_v( G.get_output_for(latents, labels, is_training=True, return_atts=False, ncut_maxval=1), 3) loss_group_recons = recons_group(group_feat1, group_feat_intanct) G_loss += group_recons_lambda * loss_group_recons return G_loss, None