def _plot_epoch_samples(self, generator, discriminator): hires_samples = [] lowres_samples = [] predictions = [] for batch in tf.data.Dataset.from_tensor_slices( self.epoch_sample_input).batch(self._config.batch_size): new_lowres_samples = self._low_res_generator(batch, training=True) new_samples = generator(new_lowres_samples, training=True) new_predictions = logistic( discriminator(new_samples, training=True)) hires_samples.append(new_samples) lowres_samples.append(new_lowres_samples) predictions.append(new_predictions) hires_samples = tf.concat(hires_samples, axis=0) lowres_samples = tf.concat(lowres_samples, axis=0) predictions = tf.concat(predictions, axis=0) rows_and_cols = (self._epoch_images_shape[0], self._epoch_images_shape[1] * 2) for i in range(hires_samples.shape[0]): plt.subplot(*rows_and_cols, 2 * i + 1) Evaluation.plot_image( lowres_samples[i], "{}x{}".format(lowres_samples.shape[1], lowres_samples.shape[2])) plt.subplot(*rows_and_cols, 2 * i + 2) Evaluation.plot_image( hires_samples[i], "{}x{}: {:.5f}".format(hires_samples.shape[1], hires_samples.shape[2], predictions[i].numpy())) return (hires_samples, lowres_samples), predictions
def gibbs(self, iters, x=None): """ Perform iters iterations of gibbs sampling, and return result. Parameters ---------- x : array-like, shape (n) Sample from model """ adj = self.adj local = self.local row = self.row col = self.col n = self.n # Initialize units to random weights if x is None: x = torch.round(torch.rand(n, device=self.device)) # Sample iter times i = 0 while i < iters: prob = logistic(x * (torch.zeros(n, device=self.device).index_add_(0, row, adj * x[col]) + local)) sample = torch.rand(n, device=self.device) x[prob > sample] = 1 x[prob < sample] = 0 # x = torch.round(prob) i += 1 # Return the state of the visible units return x
def free_mp(self): """ Unclamped message-passing for one mini-batch. """ adj = self.adj local = self.local row = self.row col= self.col n = self.n m = adj.shape[0] # Initialize to zeros because using log ratio message_old = torch.zeros(m, device=self.device) message_new = torch.zeros(m, device=self.device) iters = 0 while iters == 0 or (iters < self.max_iters and torch.max(torch.abs(message_new - message_old)) > self.eps): message_old = message_new.clone() message = (torch.zeros(n, device=self.device).index_add_(0, row, message_old)[row] - message_old)[self.r2c] message_new = local[row] # For some reason samples from model never explode message_new += torch.log(torch.exp(local[col] + adj + message) + 1) message_new -= torch.log(torch.exp(local[col] + message) + 1) message_new[torch.isinf(message_new) + torch.isnan(message_new)] = adj[torch.isinf(message_new) + torch.isnan(message_new)].clone() message_new = message_new * (1 - self.damping) + message_old * self.damping iters+=1 print(" %d iterations until convergence, minimum difference %.3e" % (iters, torch.max(torch.abs(message_new - message_old)))) self.message_free = message_new return logistic(torch.zeros(n, device=self.device).index_add_(0, row, message_new) + local)
def _plot_epoch_samples(self, generator, discriminator): samples = [] predictions = [] for batch in tf.data.Dataset.from_tensor_slices( self.epoch_sample_input).batch(self._config.batch_size): new_samples = generator(batch, training=True) disc_input = tf.concat( [batch, new_samples], axis=-1 ) if self._config.conditioned_discriminator else new_samples new_predictions = logistic(discriminator(disc_input, training=True)) samples.append(new_samples) predictions.append(new_predictions) images_per_sample = 3 samples = tf.concat(samples, axis=0) predictions = tf.concat(predictions, axis=0) rows_and_cols = (self._epoch_images_shape[0], self._epoch_images_shape[1] * images_per_sample) for i in range(self.epoch_sample_input.shape[0]): # NOTE: this assumes colored+B/W -> colored plt.subplot(*rows_and_cols, (i * images_per_sample) + 1) Evaluation.plot_image(self.epoch_sample_input[i, :, :, :-1]) plt.subplot(*rows_and_cols, (i * images_per_sample) + 2) Evaluation.plot_image(self.epoch_sample_input[i, :, :, -1:]) plt.subplot(*rows_and_cols, (i * images_per_sample) + 3) Evaluation.plot_image(samples[i], np.round(predictions[i].numpy(), 5)) return samples, predictions
def compute_out_of_distribution_score(model_folder, df, num_classes, batch_size=32, temperature=2, magnitude=0.0002, delta=0.90385): model_filepath = os.path.join(model_folder, 'DenseNet201_best_balanced_acc.hdf5') print('Loading model: ', model_filepath) model = load_model( filepath=model_filepath, custom_objects={'balanced_accuracy': balanced_accuracy(num_classes)}) image_data_format = K.image_data_format() model_param_map = get_transfer_model_param_map() generator = ImageIterator( image_paths=df['path'].tolist(), labels=None, augmentation_pipeline=LesionClassifier.create_aug_pipeline_val( model_param_map['DenseNet201'].input_size), preprocessing_function=model_param_map['DenseNet201']. preprocessing_func, batch_size=batch_size, shuffle=False, rescale=None, pregen_augmented_images=False, data_format=image_data_format) compute_perturbations, get_scaled_dense_pred_output = get_perturbation_helper_func( model, temperature, num_classes) df_score = df[['image']].copy() softmax_scores = [] learning_phase = 0 # 0 = test, 1 = train steps = math.ceil(df.shape[0] / batch_size) for _ in trange(steps): images = next(generator) perturbations = compute_perturbations([images, learning_phase])[0] # Get sign of perturbations perturbations = np.sign(perturbations) # DenseNet201 need normalization perturbations = norm_perturbations(perturbations, image_data_format) # Add perturbations to images perturbative_images = images - magnitude * perturbations # Calculate the confidence after adding perturbations dense_pred_outputs = get_scaled_dense_pred_output( [perturbative_images, learning_phase])[0] softmax_probs = softmax(dense_pred_outputs) softmax_scores.extend(np.max(softmax_probs, axis=-1).tolist()) del model K.clear_session() df_score['softmax_score'] = softmax_scores df_score['out_dist_score'] = 1 - logistic( x=df_score['softmax_score'], x0=delta, k=20) return df_score
def add_noise(p_dict, noise, common=True): out = {} if common: random_w = noise * np.random.normal(0.0, 1.0) for key in p_dict.keys(): p = p_dict[key] base_w = np.log(p / (1.0 - p)) out[key] = logistic(base_w + random_w) else: for key in p_dict.keys(): p = p_dict[key] base_w = np.log(p / (1.0 - p)) random_w = noise * np.random.normal(0.0, 1.0) out[key] = logistic(base_w + random_w) return out
def predict(self, X): try: self.w except: raise ValueError("Model must be trained before test!") lscore = logistic(np.matmul(X, self.w)) res = np.ones(len(X), dtype=np.int32) res[lscore < self.threshold] = -1 return res
def plot_probability_map(discriminator, axis, tile_size=1.): # pcolormesh seems to skip the last row/col so add a dummy column; also add tolerance before casting to int x_lim_extended = (int(x_lim[0]-1), int(x_lim[1]+2)) y_lim_extended = (int(y_lim[0]-1), int(y_lim[1]+2)) my_x_range = int((x_lim_extended[1]-x_lim_extended[0]+1) / tile_size) my_y_range = int((y_lim_extended[1]-y_lim_extended[0]+1) / tile_size) xx = np.tile(np.arange(x_lim_extended[0], x_lim_extended[1]+1, tile_size), my_y_range).reshape(my_y_range, -1) yy = np.tile(np.arange(y_lim_extended[1], y_lim_extended[0]-1, -tile_size), my_x_range).reshape(my_x_range, -1).T inputs = np.zeros(shape=(*xx.shape, dims), dtype=np.float32) for index, _ in np.ndenumerate(xx): high_dimensional_point = pca.inverse_transform((xx[index], yy[index]-tile_size)) inputs[index[0], index[1], :] = high_dimensional_point probability_map = tf.reshape(logistic(discriminator(tf.convert_to_tensor(inputs.reshape(-1, dims)), training=False)), shape=xx.shape) _ = axis.pcolormesh(xx-tile_size/2.0, yy-tile_size/2.0, probability_map, cmap="coolwarm", alpha=0.5, vmin=0, vmax=1)
def _forward_propagate(self, data): ### looping over network layers zs, activations = [], [data] for i_layer in range(self.N_layers - 1): ### calculating weighted sum + bias z = numpy.dot(self.weights[i_layer], data) + self.biases[i_layer] zs.append(z) ### applying logistic transform data = utils.logistic(z) ### storing activations activations.append(data) return zs, activations
def gradient_one_sample(self, theta, xi, yi): """Gradient of cost function (negative log likelihood) for single training example. Parameters ---------- theta: array of shape [n_features]. Point at which the gradient must be computed. xi: array of shape [n_features]. Training example. yi: scalar. Target class. Returns ------- grad: array of shape [n_features]. Gradient at theta. """ return (logistic(np.dot(theta, xi)) - yi) * xi
def process_images(current_input_names, current_second_input_names, current_target_names): data_set = get_data_method(args, current_input_names, current_second_input_names, current_target_names) for batch in tqdm(data_set, total=len(current_input_names) // args.batch_size): inputs, targets = batch if generator: targets = generator(inputs, training=True) if args.conditioned_discriminator: disc_input = tf.concat([inputs, targets], axis=-1) else: disc_input = targets disc_predictions.extend( logistic(discriminator(disc_input, training=True))) del data_set
def _plot_epoch_samples(self, generator, discriminator): samples = [] predictions = [] for batch in tf.data.Dataset.from_tensor_slices( self.epoch_sample_input).batch(self._config.batch_size): new_samples = generator(batch, training=True) new_predictions = logistic( discriminator(new_samples, training=True)) samples.append(new_samples) predictions.append(new_predictions) samples = tf.concat(samples, axis=0) predictions = tf.concat(predictions, axis=0) for i in range(samples.shape[0]): plt.subplot(*self._epoch_images_shape, i + 1) Evaluation.plot_image(samples[i], np.round(predictions[i].numpy(), 5)) return samples, predictions
def _discriminate_data_set(self, discriminator, data_set): discriminations = [] for batch in data_set: inputs, targets = batch if self._config.online_augmentation: targets = tf.concat([ targets, tf.image.flip_left_right(targets), tf.image.flip_up_down(targets), tf.image.flip_left_right(tf.image.flip_up_down(targets)) ], axis=0) inputs = tf.random_normal( [targets.shape[0], self._config.noise_dimensions]) disc_input = tf.concat( [inputs, targets], axis=-1) if self._config.conditioned_discriminator else targets disc_predictions = discriminator(disc_input, training=False) discriminations.extend(logistic(disc_predictions)) return discriminations
def inv_Hessian(self, theta, X, y): """Inverse of the cost function (negative log likelihood) hessian. Parameters ---------- theta: array of shape [n_features]. Point at which the matrix must be computed. X: array of shape [n_samples, n_features]. Training samples. y: array of shape [n_samples]. Targets. Returns ------- iHessian: array of shape [n_features, n_features]. Inverse of hessian matrix at theta. """ hessian = np.zeros((self.n_features, self.n_features)) for xi, yi in zip(X, y): lt = logistic(np.dot(theta, xi)) hessian += lt * (1.0 - lt) * np.outer(xi, xi) return np.linalg.inv(hessian)
def predict_proba(self, X): """Predicts probability (of class being = 1) from input X. Parameters ---------- X: array of shape [n_samples, n_features]. Returns ------- y: array of shape [n_samples]. Probability predictions for X. """ if not self.parameters_fit: print "WARNING: model not fit!" return None workingX = np.copy(X) n_samples, n_features = workingX.shape if self.fit_intercept: # Add intercept term workingX = np.insert(workingX, 0, np.ones(n_samples), 1) return logistic(np.dot(workingX, self.theta))
def generate_samples(generator, discriminator, args, sample_count): if os.path.exists(args.samples_dir): tf.logging.info( "Skipping sample generation, directory '{}' exists already".format( args.samples_dir)) return tf.logging.info("Generating {} samples in '{}'".format( sample_count, args.samples_dir)) os.makedirs(args.samples_dir) if args.truncation_threshold: tf.logging.warning("Truncating samples to {}".format( args.truncation_threshold)) discriminations = [] image_number = 0 data_set = tf.data.Dataset.from_tensor_slices( tf.random_normal([sample_count, args.noise_dimensions])).batch(args.batch_size) for batch in tqdm(data_set, total=sample_count // args.batch_size + 1): if args.truncation_threshold: batch = truncate_input(batch, args.truncation_threshold) images = generator(batch) discriminations.extend(logistic(discriminator(images))) for image in images: imsave( os.path.join( args.samples_dir, "sample{}_{:05d}.png".format( "_{}".format(args.description) if args.description else "", image_number)), image) image_number += 1 assert image_number == sample_count tf.logging.warning("Discriminations: {:.5f}+-{:.5f}".format( np.mean(discriminations), np.std(discriminations)))
def train(self, epochs, metrics_writer): try: tf.logging.info("Memory usage before training: {}".format( get_memory_usage_string())) except: # pylint: disable=bare-except tf.logging.warning("Unable to get memory usage, no GPU available?") if self._config.online_augmentation: tf.logging.fatal("Performing online augmentation!") if self._config.train_disc_on_previous_images: # logging this here since for these it's implemented (unlike the two-way evaluation) tf.logging.fatal( "Also training discriminator on previously-generated images") previous_generated_images = None checkpoint_interval = 25 # always have same interval for easier epoch number -> checkpoint-number conversion gradients_interval = epochs // 5 // 25 * 25 # aim for at least 5 gradients in total but have it a multiple of 25 gradients_interval = 25 if gradients_interval == 0 else min( gradients_interval, 150) if self._config.scores_every_epoch: scores_interval = 1 else: scores_interval = epochs // 10 // 10 * 10 # aim for at least 10 percentual scores in total but have it a multiple of 10 scores_interval = 10 if scores_interval == 0 else min( scores_interval, 25) tf.logging.info( "Intervals: checkpoint {}, scores {}, gradients {}".format( checkpoint_interval, scores_interval, gradients_interval)) for epoch in range(epochs): start = time.time() metrics = OneWayMetrics(epoch + 1, 4) if self._config.extra_disc_step_real or self._config.extra_disc_step_both: for batch_number, batch in enumerate(self.data_set): inputs, targets = batch if self._config.online_augmentation: targets = tf.concat([ targets, tf.image.flip_left_right(targets), tf.image.flip_up_down(targets), tf.image.flip_left_right( tf.image.flip_up_down(targets)) ], axis=0) inputs = tf.random_normal( [targets.shape[0], self._config.noise_dimensions]) with tf.GradientTape() as disc_tape: disc_input_real = tf.concat( [inputs, targets], axis=-1 ) if self._config.conditioned_discriminator else targets disc_on_real = self._discriminator(disc_input_real, training=True) disc_on_generated = None if self._config.extra_disc_step_both: generated_images = self._generator(inputs, training=False) disc_input_generated = tf.concat( [inputs, generated_images], axis=-1 ) if self._config.conditioned_discriminator else generated_images disc_on_generated = self._discriminator( disc_input_generated, training=True) disc_loss = self._model.disc_loss( disc_on_real, disc_on_generated) gradients_of_discriminator = disc_tape.gradient( disc_loss, self._discriminator.variables) self._discriminator_optimizer.apply_gradients( zip(gradients_of_discriminator, self._discriminator.variables)) for batch_number, batch in enumerate(self.data_set): inputs, targets = batch if self._config.online_augmentation: targets = tf.concat([ targets, tf.image.flip_left_right(targets), tf.image.flip_up_down(targets), tf.image.flip_left_right( tf.image.flip_up_down(targets)) ], axis=0) inputs = tf.random_normal( [targets.shape[0], self._config.noise_dimensions]) if self._config.train_disc_on_previous_images and previous_generated_images is not None: with tf.GradientTape() as disc_tape: disc_on_generated = self._discriminator( previous_generated_images, training=True) disc_loss = self._model.disc_loss( None, disc_on_generated) gradients_of_discriminator = disc_tape.gradient( disc_loss, self._discriminator.variables) self._discriminator_optimizer.apply_gradients( zip(gradients_of_discriminator, self._discriminator.variables)) with tf.GradientTape() as gen_tape, tf.GradientTape( ) as disc_tape: generated_images = self._generator(inputs, training=True) previous_generated_images = generated_images if self._config.real_image_noise_stdev: # stddev = tf.random_uniform((1,), maxval=self._config.real_image_noise_stdev) stddev = self._config.real_image_noise_stdev targets = tf.minimum( tf.maximum( tf.add( targets, tf.random_normal(targets.shape, mean=0.0, stddev=stddev)), -1), 1) disc_input_real = tf.concat( [inputs, targets], axis=-1 ) if self._config.conditioned_discriminator else targets disc_input_generated = tf.concat( [inputs, generated_images], axis=-1 ) if self._config.conditioned_discriminator else generated_images disc_on_real = self._discriminator(disc_input_real, training=True) disc_on_generated = self._discriminator( disc_input_generated, training=True) # set up additional args for the generator loss calculation gen_loss_args = GeneratorLossArgs(generated_images, inputs, targets=targets) gen_losses = self._model.gen_loss(disc_on_generated, gen_loss_args) gen_loss = sum(gen_losses.values()) disc_loss = self._model.disc_loss(disc_on_real, disc_on_generated) metrics.add_losses(gen_losses, disc_loss) metrics.add_discriminations( logistic(disc_on_real), logistic(disc_on_generated[:disc_on_real.shape[0]])) gradients_of_generator = gen_tape.gradient( gen_loss, self._generator.variables) gradients_of_discriminator = disc_tape.gradient( disc_loss, self._discriminator.variables) self._generator_optimizer.apply_gradients( zip(gradients_of_generator, self._generator.variables)) self._discriminator_optimizer.apply_gradients( zip(gradients_of_discriminator, self._discriminator.variables)) if batch_number == 0: # work with the gradients of the first (rather than last) batch since here, the batch is full for sure for i, variable in enumerate(self._generator.variables): if "batch_normalization" in variable.name or gradients_of_generator[ i] is None: continue tf.contrib.summary.histogram( variable.name.replace(":", "_"), gradients_of_generator[i], "gradients/gen", epoch) for i, variable in enumerate( self._discriminator.variables): if "batch_normalization" in variable.name or gradients_of_discriminator[ i] is None: continue tf.contrib.summary.histogram( variable.name.replace(":", "_"), gradients_of_discriminator[i], "gradients/disc", epoch) if (epoch + 1 ) % gradients_interval == 0 or epoch == epochs - 1: generator_gradients = [(variable.name, gradients_of_generator[i].numpy()) \ for i, variable in enumerate(self._generator.variables) if "batch_normalization" not in variable.name] discriminator_gradients = [(variable.name, gradients_of_discriminator[i].numpy()) \ for i, variable in enumerate(self._discriminator.variables) if "batch_normalization" not in variable.name] with open( os.path.join( self._config.gradients_dir, "gradients_at_epoch_{:04d}.pkl".format( epoch + 1)), "wb") as fh: pickle.dump( (generator_gradients, gen_loss.numpy(), discriminator_gradients, disc_loss.numpy()), fh) if self.extra_discriminator_data_set: assert not self._config.conditioned_discriminator, "doesn't make sense - extra targets means 'too few inputs'" assert not self._config.train_disc_on_previous_images and \ not self._config.real_image_noise_stdev, "not implemented" for disc_input in self.extra_discriminator_data_set: with tf.GradientTape() as disc_tape: disc_on_real = self._discriminator(disc_input, training=True) disc_loss = self._model.disc_loss( disc_on_real, []) # no generated samples gradients_of_discriminator = disc_tape.gradient( disc_loss, self._discriminator.variables) self._discriminator_optimizer.apply_gradients( zip(gradients_of_discriminator, self._discriminator.variables)) _ = self.save_epoch_samples(self._generator, self._discriminator, epoch + 1, (epoch + 1) % 5 == 0) tf.contrib.summary.histogram("gen", metrics.gen_loss, "loss", epoch) tf.contrib.summary.histogram("disc", metrics.disc_loss, "loss", epoch) tf.contrib.summary.histogram("on_real", metrics.disc_on_real, "predictions", epoch) tf.contrib.summary.histogram("on_gen", metrics.disc_on_generated, "predictions", epoch) if (epoch + 1) % checkpoint_interval == 0 or epoch == epochs - 1: self._checkpoint.save() elif epoch > epochs - 6 and self._final_checkpoint: self._final_checkpoint.save() if epoch == 0 or epoch == 4 or (epoch + 1) % 10 == 0: memory_usage = "" if epoch == 0 or epoch == 4 or (epoch + 1) % 50 == 0: try: memory_usage = " - memory: " + get_memory_usage_string( ) except: # pylint: disable=bare-except memory_usage = " - Unable to get memory usage, no GPU available?" time_remaining = (time.time() - self._config.start_time) / ( epoch + 1) * (epochs - epoch - 1) / 60 tf.logging.info( "{}/{}: Round time: {:.1f}m - ETA {:%H:%M} ({:.1f}h){}". format(epoch + 1, epochs, (time.time() - start) / 60, datetime.now() + timedelta(minutes=time_remaining), time_remaining / 60, memory_usage)) # pylint: disable=line-too-long tf.logging.info( "{}/{}: Loss G {:.2f}+-{:.3f}, D {:.2f}+-{:.3f}; D on real {:.3f}+-{:.3f}, on fake {:.3f}+-{:.3f}" .format(epoch + 1, epochs, np.mean(metrics.gen_loss), np.std(metrics.gen_loss), np.mean(metrics.disc_loss), np.std(metrics.disc_loss), np.mean(metrics.disc_on_real), np.std(metrics.disc_on_real), np.mean(metrics.disc_on_generated), np.std(metrics.disc_on_generated))) is_near_interval = \ (epoch+0) % scores_interval == 0 or \ (epoch+1) % scores_interval == 0 or \ (epoch+2) % scores_interval == 0 if self._perceptual_scores and ( epoch > 1 or self._config.scores_every_epoch) and ( epoch > epochs - 6 or is_near_interval): fid, mmd, clustering_high, clustering_low, low_level_fids, combined_fid = self._perceptual_scores.compute_scores_from_generator( self._generator, self.data_set.map(lambda x, y: x)) tf.logging.warning( "{}/{}: Computed perceptual scores: FID={:.1f}, MMD={:.3f}, clustering-high={:.3f}, clustering-low={:.3f}" .format(epoch + 1, epochs, fid, mmd, clustering_high, clustering_low)) metrics.add_perceptual_scores(fid, mmd, clustering_high, clustering_low, low_level_fids, combined_fid) tf.contrib.summary.scalar("fid", fid, "perceptual", epoch) tf.contrib.summary.scalar("mmd", mmd, "perceptual", epoch) tf.contrib.summary.scalar("clustering_high", clustering_high, "perceptual", epoch) tf.contrib.summary.scalar("clustering_low", clustering_low, "perceptual", epoch) # not adding low-level FIDs to TB since I'm not using it anyway tf.contrib.summary.scalar("combined_fid", combined_fid, "perceptual", epoch) if self.test_data_set and ( epoch > 1 or self._config.scores_every_epoch) and ( epoch > epochs - 6 or is_near_interval): disc_on_training = self._discriminate_data_set( self._discriminator, self.data_set) disc_on_training_mean = np.mean(disc_on_training) disc_on_training_std = np.std(disc_on_training) disc_on_test = self._discriminate_data_set( self._discriminator, self.test_data_set) disc_on_test_mean = np.mean(disc_on_test) disc_on_test_std = np.std(disc_on_test) tf.logging.warning( "{}/{}: Disc on training: {:.3f}+-{:.3f}, on test: {:.3f}+-{:.3f}, diff: {:.3f}" .format(epoch + 1, epochs, disc_on_training_mean, disc_on_training_std, disc_on_test_mean, disc_on_test_std, disc_on_training_mean - disc_on_test_mean)) metrics.add_disc_on_training_test(disc_on_training_mean, disc_on_training_std, disc_on_test_mean, disc_on_test_std) tf.contrib.summary.scalar("disc_on_training_mean", disc_on_training_mean, "disc_overfitting", epoch) tf.contrib.summary.scalar("disc_on_training_std", disc_on_training_std, "disc_overfitting", epoch) tf.contrib.summary.scalar("disc_on_test_mean", disc_on_test_mean, "disc_overfitting", epoch) tf.contrib.summary.scalar("disc_on_test_std", disc_on_test_std, "disc_overfitting", epoch) metrics_writer.writerow(metrics.get_row_data())
------- iHessian: array of shape [n_features, n_features]. Inverse of hessian matrix at theta. """ hessian = np.zeros((self.n_features, self.n_features)) for xi, yi in zip(X, y): lt = logistic(np.dot(theta, xi)) hessian += lt * (1.0 - lt) * np.outer(xi, xi) return np.linalg.inv(hessian) if __name__ == "__main__": n_samples = 100 x = np.linspace(-2.0, 2.0, n_samples) y = np.where(logistic(0.8 + 1.2 * x) >= 0.5, 1.0, 0.0) X = np.atleast_2d(x).T # Add some noise (to prevent parameter explosion) np.random.seed(0) to_flip = np.random.random_integers(0, n_samples - 1, 12) y[to_flip] = 1.0 - y[to_flip] # theta should be close to [0.8, 1.2] reg1 = LogisticRegression() reg1.fit(X, y) print reg1.theta, reg1.theta * 0.8 / reg1.theta[0] reg2 = LogisticRegression(method = "batch_gd", alpha = 0.01) reg2.fit(X, y) print reg2.theta, reg2.theta * 0.8 / reg2.theta[0]
def __gradient(self, x:np.ndarray, t:np.ndarray): data_w = self.data_w.reshape([-1, 1]) rss = t - logistic(np.matmul(x, self.w)) return -np.mean(x * rss.reshape([-1, 1] * data_w), axis=0)
def train(model, generator, discriminator, generator_optimizer, discriminator_optimizer, checkpoint, metrics_writer, args): for epoch in range(args.epochs): start = time.time() losses = [] discriminations = [list(), list()] all_targets = [] all_generated = [] for _ in range(args.batches): inputs = draw_inputs(args.batch_size, args.noise_dimensions) # if (epoch+1) % 10 == 0: # inputs += 2 targets = tf.convert_to_tensor(draw_real_samples(args.batch_size)) # additional_targets = tf.convert_to_tensor(draw_real_samples(args.batch_size*2)) all_targets.extend(targets) with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape: generated = generator(inputs, training=True) all_generated.extend(generated) disc_on_real = discriminator(targets, training=True) # disc_on_additional_real = discriminator(additional_targets, training=True) disc_on_generated = discriminator(generated, training=True) gen_losses = model.gen_loss(disc_on_generated, None) gen_loss = sum(gen_losses.values()) # disc_on_combined_real = tf.concat([disc_on_real, disc_on_additional_real], axis=0) # disc_loss = model.disc_loss(disc_on_combined_real, disc_on_generated) disc_loss = model.disc_loss(disc_on_real, disc_on_generated) losses.append([gen_loss, disc_loss]) discriminations[0].extend(logistic(disc_on_real)) discriminations[1].extend(logistic(disc_on_generated)) gradients_of_generator = gen_tape.gradient(gen_loss, generator.variables) gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.variables) generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.variables)) discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.variables)) if (epoch+1) % 10 == 0: checkpoint.save() time_remaining = (time.time()-args.start_time)/(epoch+1)*(args.epochs-epoch-1)/60 # get both types of metrics into numpy arrays with the metrics in the first dimension losses = np.array(losses).T discriminations = np.array(discriminations) if (epoch+1) % 1 == 0: store_epoch_results(tf.convert_to_tensor(all_targets), tf.convert_to_tensor(all_generated), generator, discriminator, args, epoch+1, discriminations[0].mean() - discriminations[1].mean()) store_epoch_results(tf.convert_to_tensor(all_targets), tf.convert_to_tensor(all_generated), generator, discriminator, args, epoch+1, discriminations[0].mean() - discriminations[1].mean(), 2) tf.logging.info("{}/{}: Loss G {:.2f}, D {:.2f}; D on real {:.3f}, on fake {:.3f} ({:.3f}); {:.1f}s; ETA {:%H:%M} ({:.1f}h)".format( epoch + 1, args.epochs, losses[0].mean(), losses[1].mean(), discriminations[0].mean(), discriminations[1].mean(), discriminations[0].mean() - discriminations[1].mean(), time.time()-start, datetime.now() + timedelta(minutes=time_remaining), time_remaining/60)) process_values = lambda values: (np.mean(values), np.std(values)) aggregated_metrics = [item for sublist in [process_values(values) for values in losses] + [process_values(values) for values in discriminations] for item in sublist] metrics_writer.writerow([epoch+1, time.time()-start, *aggregated_metrics])
def update(self, data): """ Update of model parameters. Parameters ---------- data : array-like, shape (n_data ** 2, n_batch) The data for current epoch, needed for updating gradient. """ adj = self.adj local = self.local row = self.row col = self.col r2c = self.r2c clamp = data.shape[0] batch_size = data.shape[1] message_free = self.message_free message_clamp = self.message_clamp n = self.n #Old without external msg_free = self.message_free sum_free = (torch.zeros(n, device=self.device).index_add_(0, row, msg_free)[row] - msg_free) p_ij_marg = 1 / (torch.exp(-(sum_free + sum_free[r2c] + local[row] + local[col] + adj)) \ + torch.exp(-(sum_free + local[row] + adj)) + torch.exp(-(sum_free[r2c] + local[col] + adj)) + 1) #Calculate conditional joint probability msg_clamp = self.message_clamp adj = adj.unsqueeze(1).expand(-1, batch_size) local = local.unsqueeze(1).expand(-1, batch_size) # Add unit that sends signal of +-100 based on data msg_sum = torch.zeros((n, batch_size), device=self.device).index_add_(0, row, msg_clamp) msg_sum[:clamp] += (data * 2 - 1) * 100 sum_clamp = msg_sum[row] - msg_clamp p_ij_cond = 1 / (torch.exp(-(sum_clamp + sum_clamp[r2c] + local[row] + local[col] + adj)) \ + torch.exp(-(sum_clamp + local[row] + adj)) + torch.exp(-(sum_clamp[r2c] + local[col] + adj)) + 1) #Calculate marginal and condtional probabilities message_clamp[:clamp] += (data * 2 - 1) * 100 p_i_marg = logistic(torch.zeros(n, device=self.device).index_add_(0, row, message_free) + self.local) p_i_cond = logistic(torch.zeros((n, batch_size), device=self.device).index_add_(0, row, message_clamp) + local) # Average over batchs p_ij_cond = torch.sum(p_ij_cond, 1) / batch_size p_i_cond = torch.sum(p_i_cond, 1) / batch_size mask = (row >= clamp) * (col >= clamp) * (1 - torch.isinf(torch.log(p_ij_cond))) neg_clamp = torch.sum(torch.log(p_ij_cond)[mask] * p_ij_cond[mask]) / 2 const_clamp = torch.zeros(n, device=self.device).index_add_(0, row[col >= clamp], torch.ones(row[col >= clamp].shape[0], device=self.device)) - 1 mask = (p_i_cond != 0) pos_clamp = torch.sum(const_clamp[mask] * (p_i_cond[mask] * torch.log(p_i_cond[mask]))) entropy_clamp = pos_clamp - neg_clamp energy = torch.sum((p_ij_cond - p_ij_marg) * self.adj) / 2 + torch.sum((p_i_cond - p_i_marg) * self.local) neg = torch.sum(torch.log(p_ij_marg) * p_ij_marg) / 2 const = torch.zeros(n, device=self.device).index_add_(0, row, torch.ones(row.shape[0], device=self.device)) - 1 pos = torch.sum(const * (p_i_marg * torch.log(p_i_marg))) entropy = pos - neg bethe = (entropy_clamp - entropy) + energy delta_loc = torch.norm(p_i_cond - p_i_marg) delta_adj = torch.norm(p_ij_cond - p_ij_marg) #Update model parameters th = self.th self.adj = torch.clamp(self.adj + self.lr * (p_ij_cond - p_ij_marg), max=th) self.local = torch.clamp(self.local + self.lr * (p_i_cond - p_i_marg), max=th) print(" avg weight: %.3g" % (torch.sum(self.adj) / self.adj.shape[0])) print(" max weight: %.3g" % (torch.max(self.adj))) print(" min weight: %.3g" % (torch.min(self.adj))) print(" avg bias: %.3g" % (torch.sum(self.local) / self.local.shape[0])) print(" max bias: %.3g" % (torch.max(self.local))) print(" min bias: %.3g" % (torch.min(self.local))) print(" delta local: %.3g" % delta_loc) print(" delta adj: %.3g" % delta_adj) print(" batch bethe: %.3g" % bethe) return bethe
from logisticbandit import LogisticBandit, is_pos_semidef from utils import logistic from simulate import add_noise, simulate_constant, simulate_noise, simulate ## simulate_noise test BASE_P = .3 num_arm = 10 N = 10000 n_rep = 100 p_list = {"arm_" + str(i): BASE_P for i in range(num_arm)} p_list["arm_0"] = BASE_P + .01 delta = logistic(p_list["arm_0"]) - logistic(p_list["arm_1"]) noise_seq = np.repeat( delta * np.array([10., 20, 30, 40, 50, 60, 70, 80, 90, 100]), n_rep) TIMESTEP = 50 regret_or = [] regret_fu = [] regret_be = [] for NOISE in tqdm(noise_seq): p_noise_seq = [add_noise(p_list, noise=NOISE) for _ in range(TIMESTEP)] try: k = np.random.randint(1, 1000)
def main(start_time): tf.enable_eager_execution() # handle arguments and config args = parse_arguments() args.start_time = start_time tf.logging.info("Args: {}".format(args)) assert args.input_type == "patho" assert args.target_type == "image" args.has_colored_input = args.input_type == "image" args.has_colored_target = args.target_type == "image" args.discriminator_classes = 1 args.checkpoint_dir = os.path.join("output", args.eval_dir, "checkpoints") model = load_model(args) generator = model.get_generator() discriminator = model.get_discriminator() load_checkpoint(args, checkpoint_number=args.epoch // 25, generator=generator, discriminator=discriminator) input_image_names = np.array(load_image_names(args.data_dir)) # target_image_names = input_image_names if not args.target_data_dir else \ # np.array(load_image_names(args.target_data_dir)) sample_indexes = np.random.choice(len(input_image_names), args.interpolation_pairs * 2 * args.image_count, replace=False) input_images = load_images(input_image_names[sample_indexes], args.data_dir, args.input_type) # target_images = load_images(target_image_names[sample_indexes], args.target_data_dir or args.data_dir, args.target_type) for image_number in range(args.image_count): tf.logging.info("Generating image {}/{}".format( image_number + 1, args.image_count)) plt.figure(figsize=(int(args.width / 100), int(args.height / 100))) with tqdm(total=args.interpolation_pairs * args.interpolation_steps) as pbar: for i in range(args.interpolation_pairs): index = 2 * i + 2 * image_number * args.interpolation_pairs for j in range(args.interpolation_steps): pbar.update(1) input_sample = interp_shape( input_images[index, :, :, 0], input_images[index + 1, :, :, 0], float(j) / (args.interpolation_steps - 1)) gen_input = tf.expand_dims(tf.expand_dims(input_sample, 0), -1) predictions = generator(gen_input, training=False) disc_input = tf.concat( [gen_input, predictions], axis=-1 ) if args.conditioned_discriminator else predictions classifications = discriminator(disc_input, training=False) plt.subplot(args.interpolation_pairs * 2, args.interpolation_steps, 2 * i * args.interpolation_steps + j + 1) plt.axis("off") if args.has_colored_input: plt.imshow( np.array((input_sample + 1) * 127.5, dtype=np.uint8)) else: plt.imshow(np.array((input_sample + 1) * 127.5, dtype=np.uint8), cmap="gray", vmin=0, vmax=255) plt.subplot( args.interpolation_pairs * 2, args.interpolation_steps, 2 * i * args.interpolation_steps + j + 1 + args.interpolation_steps) plt.axis("off") plt.title(np.round( logistic(classifications[0]).numpy(), 5), fontsize=20) if args.has_colored_target: plt.imshow( np.array((predictions[0] + 1) * 127.5, dtype=np.uint8)) else: plt.imshow(np.array( (predictions[0, :, :, 0] + 1) * 127.5, dtype=np.uint8), cmap="gray", vmin=0, vmax=255) plt.tight_layout() figure_file = os.path.join( "output", args.eval_dir, "noise_interpolation{}_{:03d}.png".format( "_{}".format(args.description) if args.description else "", image_number + 1)) plt.savefig(figure_file) plt.close() tf.logging.info("Finished generating {} images".format(args.image_count))
def generate_samples(args, generator, discriminator, training_input_image_names, training_second_input_image_names, test_input_image_names, test_second_input_image_names, image_number): labels = [ "original", "lrflip", "udflip", "test", "blank", "blank-lrflip", "blank-udflip", "blank-test" ] input_images, second_input_images = build_inputs( args, training_input_image_names, training_second_input_image_names, test_input_image_names, test_second_input_image_names, image_number) input_images = np.concatenate(input_images, axis=0) second_input_images = np.concatenate(second_input_images, axis=0) combined_input_images = np.concatenate([input_images, second_input_images], axis=-1) data_set_size = len(input_images) data_set = tf.data.Dataset.from_tensor_slices(combined_input_images).batch( args.batch_size) del input_images del second_input_images del combined_input_images plt.figure(figsize=(int(args.width / 100), int(args.height / 100))) plt.suptitle("{}: different G inputs".format(args.eval_dir), fontsize=28) plt.tight_layout() column_count = 2 rows = math.ceil(data_set_size / column_count) images_per_group = 4 columns = images_per_group * column_count image_index = 1 for inputs in tqdm(data_set, total=data_set_size // args.batch_size): generated_samples = generator(inputs, training=True) if args.conditioned_discriminator: disc_input = tf.concat([inputs, generated_samples], axis=-1) else: disc_input = generated_samples disc_predictions = logistic(discriminator(disc_input, training=True)) for i in range(inputs.shape[0]): plt.subplot(rows, columns, image_index) label_index = image_index // images_per_group // len( training_input_image_names) in_label_index = (image_index // images_per_group ) % len(training_input_image_names) Evaluation.plot_image(inputs[i][:, :, :-1], title="{} {}".format(labels[label_index], in_label_index + 1), title_size=20) image_index += 1 plt.subplot(rows, columns, image_index) Evaluation.plot_image(inputs[i][:, :, -1:]) image_index += 1 plt.subplot(rows, columns, image_index) Evaluation.plot_image(generated_samples[i], title=disc_predictions[i].numpy(), title_size=20) image_index += 1 plt.subplot(rows, columns, image_index) Evaluation.plot_image(generated_samples[i] - inputs[i][:, :, :-1]) image_index += 1 figure_file = os.path.join( args.output_dir, "generator_on_inputs{}_{:03d}.png".format( "_{}".format(args.description) if args.description else "", image_number + 1)) plt.savefig(figure_file) plt.close()
def main(start_time): tf.enable_eager_execution() # handle arguments and config args = parse_arguments() args.start_time = start_time tf.logging.info("Args: {}".format(args)) args.has_colored_target = args.colored args.checkpoint_dir = os.path.join("output", args.eval_dir, "checkpoints") model = load_model(args) generator = model.get_generator() discriminator = model.get_discriminator() load_checkpoint(args, checkpoint_number=args.epoch // 25, generator=generator, discriminator=discriminator) gen_training = not False disc_training = False for image_number in range(args.image_count): tf.logging.info("Generating image {}/{}".format( image_number + 1, args.image_count)) plt.figure(figsize=(32, 32)) inputs = tf.random_normal([args.search_samples, args.noise_dimensions]) samples = generator(inputs, training=gen_training) predictions = logistic(discriminator(samples, training=disc_training)) best_index = tf.argmax(predictions) best_index = best_index.numpy() if best_index.shape else best_index previous_prediction = predictions[best_index] plt.subplot(args.rows, args.columns, 1) Evaluation.plot_image(samples[best_index], np.round(predictions[best_index].numpy(), 5)) previous_direction = None improvements = 0 best_input = inputs[best_index] if args.step_size is not None: current_step_size = args.step_size for i in range(1, args.rows * args.columns): tf.logging.info( "Looking for image {}/{}, previous prediction: {}{}".format( i + 1, args.rows * args.columns, previous_prediction, "" if args.step_size is None else ", step: {:.3f}".format(current_step_size))) # get new possible directions to move directions = tf.random_normal( [args.search_samples, args.noise_dimensions], stddev=0.1) if previous_direction is not None: directions = tf.concat( [[previous_direction], directions[1:, :]], axis=0) # obtain new inputs by moving previous input into the various directions lengths = [tf.norm(direction).numpy() for direction in directions] tf.logging.debug("Direction lengths: {}".format(",".join( [str(l) for l in lengths]))) inputs = tf.reshape(tf.tile(best_input, [args.search_samples]), (-1, args.noise_dimensions)) if args.step_size is None: inputs = inputs + directions else: directions = [ direction * current_step_size / tf.norm(direction) for direction in directions ] inputs = inputs + directions # get new sampels and predictions samples = generator(inputs, training=gen_training) predictions = logistic( discriminator(samples, training=disc_training)) best_index = tf.argmax(predictions) best_index = best_index.numpy() if best_index.shape else best_index tf.logging.debug( "Best previous input: {}, input at best position: {}, direction: {}" .format(best_input[0], inputs[best_index, 0], directions[best_index][0])) if previous_direction is not None and best_index == 0: tf.logging.info("Going into the same direction again!") if predictions[best_index].numpy() > previous_prediction.numpy(): previous_prediction = predictions[best_index] previous_direction = directions[best_index] best_input = inputs[best_index] plt.subplot(args.rows, args.columns, i + 1) Evaluation.plot_image( samples[best_index], np.round(predictions[best_index].numpy(), 5)) improvements += 1 else: previous_direction = None tf.logging.info("No improvement found") if args.step_size is not None: current_step_size *= args.size_factor tf.logging.info( "Improved the original image {} times ({:.1f}%)".format( improvements, 100. * improvements / (args.rows * args.columns - 1))) plt.tight_layout() figure_file = os.path.join( "output", args.eval_dir, "samples{}_{:03d}.png".format( "_{}".format(args.description) if args.description else "", image_number + 1)) plt.savefig(figure_file) plt.close() tf.logging.info("Finished generating {} images".format(args.image_count))
def main(start_time): tf.enable_eager_execution() # handle arguments and config args = parse_arguments() args.start_time = start_time tf.logging.info("Args: {}".format(args)) args.has_colored_target = args.colored args.output_dir = os.path.join("output", args.eval_dir) if not os.path.exists(args.output_dir): args.output_dir = os.path.join("old-output", args.eval_dir) args.checkpoint_dir = os.path.join(args.output_dir, "checkpoints") model = load_model(args) generator = model.get_generator() discriminator = model.get_discriminator() load_checkpoint(args, checkpoint_number=(args.epoch + 24) // 25, generator=generator, discriminator=discriminator) if args.truncation_threshold: tf.logging.warning("Truncating samples to {}".format( args.truncation_threshold)) for image_number in trange(args.image_count): plt.figure(figsize=(32, 32)) batch = tf.random_normal( [args.rows * args.columns, args.noise_dimensions]) if args.truncation_threshold: batch = truncate_input(batch, args.truncation_threshold) samples = generator(batch, training=True) if args.titles: predictions = logistic(discriminator(samples, training=True)) for i in range(samples.shape[0]): plt.subplot(args.rows, args.columns, i + 1) Evaluation.plot_image( samples[i], np.round(predictions[i].numpy(), 5) if args.titles else None) plt.tight_layout() figure_file = os.path.join( args.output_dir, "samples{}_{:03d}.png".format( "_{}".format(args.description) if args.description else "", image_number + 1)) plt.savefig(figure_file) plt.close() # also store store some of the images in HD plt.figure(figsize=(32, 32)) for i in range(args.rows // 2 * args.columns // 2): plt.subplot(args.rows // 2, args.columns // 2, i + 1) Evaluation.plot_image( samples[i], np.round(predictions[i].numpy(), 5) if args.titles else None) plt.tight_layout() figure_file = os.path.join( args.output_dir, "samples{}_large_{:03d}.png".format( "_{}".format(args.description) if args.description else "", image_number + 1)) plt.savefig(figure_file) plt.close() tf.logging.info("Finished generating {} images".format(args.image_count))
def main(start_time): tf.enable_eager_execution() # handle arguments and config args = parse_arguments() args.start_time = start_time tf.logging.info("Args: {}".format(args)) assert not (args.single_interpolation and args.grid_interpolation) args.has_colored_target = args.colored args.discriminator_classes = 1 args.eval_dir = os.path.join("output", args.eid) if not os.path.exists(args.eval_dir): args.eval_dir = os.path.join("old-output", args.eid) args.checkpoint_dir = os.path.join(args.eval_dir, "checkpoints") model = load_model(args) generator = model.get_generator() discriminator = model.get_discriminator() load_checkpoint(args, checkpoint_number=args.epoch // 25, generator=generator, discriminator=discriminator) lerp = lambda val, low, high: interp1d( [0, 1], np.vstack([low, high]), axis=0)(val) interpolate = lambda val, low, high: slerp( val, low, high) if args.spherical else lerp(val, low, high) disc_predictions = [] for image_number in range(args.image_count): tf.logging.info("Generating image {}/{}".format( image_number + 1, args.image_count)) plt.figure(figsize=(int(args.width / 100), int(args.height / 100))) # plt.suptitle("{}: {} interpolations with {} steps ({}/{})".format(args.eval_dir, # args.rows, args.columns, image_number+1, args.image_count), fontsize=16) if args.single_interpolation: start_end_samples = np.random.normal(size=(2, args.noise_dimensions)) if args.grid_interpolation: corner_samples = np.random.normal(size=(4, args.noise_dimensions)) with tqdm(total=args.rows * args.columns) as pbar: for i in range(args.rows): if args.grid_interpolation: row_samples = \ (interpolate(i / (args.rows-1), corner_samples[0], corner_samples[2]), interpolate(i / (args.rows-1), corner_samples[1], corner_samples[3])) if not args.single_interpolation and not args.grid_interpolation: row_samples = np.random.normal( size=(2, args.noise_dimensions)) for j in range(args.columns): pbar.update(1) if args.single_interpolation: sample = interpolate( (i * args.columns + j) / args.rows / args.columns, start_end_samples[0], start_end_samples[1]) else: # same for "normal" and for grid interpolation sample = interpolate(j / (args.columns - 1), *row_samples) predictions = generator(tf.convert_to_tensor( sample.reshape(1, args.noise_dimensions), dtype=tf.float32), training=False) classification = logistic( discriminator(predictions, training=False).numpy())[0] disc_predictions.append(classification) # Evaluation.plot_image(predictions[i]) plt.subplot(args.rows, args.columns, i * args.columns + j + 1) plt.axis("off") # plt.title(classification, fontsize=20) if args.colored: plt.imshow( np.array((predictions[0] + 1) * 127.5, dtype=np.uint8)) else: plt.imshow(np.array( (predictions[0, :, :, 0] + 1) * 127.5, dtype=np.uint8), cmap="gray", vmin=0, vmax=255) tf.logging.info("Average disc prediction: {:.3f}+-{:.3f}".format( np.mean(disc_predictions), np.std(disc_predictions))) tf.logging.info("Saving image") plt.tight_layout() figure_file = os.path.join( args.eval_dir, "noise_interpolation{}_{:03d}.png".format( "_{}".format(args.description) if args.description else "", image_number + 1)) plt.savefig(figure_file) plt.close() tf.logging.info("Finished generating {} images".format(args.image_count))
def _plot_epoch_samples(self, generator, discriminator): first_generator, second_generator = generator first_discriminator, second_discriminator = discriminator first_samples = [] first_reconstructions = [] first_predictions = [] first_reconstruction_predictions = [] for batch in tf.data.Dataset.from_tensor_slices( self.epoch_sample_input).batch(self._config.batch_size): segmentations = batch[:, :, :, -1:] new_samples = first_generator(batch, training=True) first_samples.append(new_samples) disc_input = tf.concat( [batch, new_samples], axis=-1 ) if self._config.conditioned_discriminator else new_samples first_predictions.append( logistic(second_discriminator(disc_input, training=True))) new_reconstructions = second_generator(tf.concat( [new_samples, segmentations], axis=-1), training=True) first_reconstructions.append(new_reconstructions) disc_input = tf.concat( [batch, new_reconstructions], axis=-1 ) if self._config.conditioned_discriminator else new_reconstructions first_reconstruction_predictions.append( logistic(first_discriminator(disc_input, training=True))) second_samples = [] second_reconstructions = [] second_predictions = [] second_reconstruction_predictions = [] for batch in tf.data.Dataset.from_tensor_slices( self._epoch_sample_targets).batch(self._config.batch_size): segmentations = batch[:, :, :, -1:] new_samples = second_generator(batch, training=True) second_samples.append(new_samples) disc_input = tf.concat( [batch, new_samples], axis=-1 ) if self._config.conditioned_discriminator else new_samples second_predictions.append( logistic(first_discriminator(disc_input, training=True))) new_reconstructions = first_generator(tf.concat( [new_samples, segmentations], axis=-1), training=True) second_reconstructions.append(new_reconstructions) disc_input = tf.concat( [batch, new_reconstructions], axis=-1 ) if self._config.conditioned_discriminator else new_reconstructions second_reconstruction_predictions.append( logistic(second_discriminator(disc_input, training=True))) first_samples = tf.concat(first_samples, axis=0) first_reconstructions = tf.concat(first_reconstructions, axis=0) first_predictions = tf.concat(first_predictions, axis=0) first_reconstruction_predictions = tf.concat( first_reconstruction_predictions, axis=0) second_samples = tf.concat(second_samples, axis=0) second_reconstructions = tf.concat(second_reconstructions, axis=0) second_predictions = tf.concat(second_predictions, axis=0) second_reconstruction_predictions = tf.concat( second_reconstruction_predictions, axis=0) images_per_row = 7 rows_and_cols = (self._epoch_images_shape[0], self._epoch_images_shape[1] * images_per_row) for i in range(self._epoch_sample_input.shape[0]): # NOTE: this assumes colored+B/W -> colored # first column: segmentation plt.subplot(*rows_and_cols, (i * images_per_row) + 1) Evaluation.plot_image(self._epoch_sample_input[i, :, :, -1:]) # first gen input plt.subplot(*rows_and_cols, (i * images_per_row) + 2) Evaluation.plot_image(self._epoch_sample_input[i, :, :, :-1]) # first gen output plt.subplot(*rows_and_cols, (i * images_per_row) + 3) Evaluation.plot_image(first_samples[i], np.round(first_predictions[i].numpy(), 5)) # second gen reconstruction plt.subplot(*rows_and_cols, (i * images_per_row) + 4) Evaluation.plot_image( first_reconstructions[i], np.round(first_reconstruction_predictions[i].numpy(), 5)) # second gen input plt.subplot(*rows_and_cols, (i * images_per_row) + 5) Evaluation.plot_image(self._epoch_sample_targets[i, :, :, :-1]) # second gen output plt.subplot(*rows_and_cols, (i * images_per_row) + 6) Evaluation.plot_image(second_samples[i], np.round(second_predictions[i].numpy(), 5)) # first gen reconstruction plt.subplot(*rows_and_cols, (i * images_per_row) + 7) Evaluation.plot_image( second_reconstructions[i], np.round(second_reconstruction_predictions[i].numpy(), 5)) return (((first_samples, first_reconstructions), (second_samples, second_reconstructions)), ((first_predictions, first_reconstruction_predictions), (second_predictions, second_reconstruction_predictions)))
def __rss(self, X, t): ks = logistic(np.matmul(X, self.w)) - self.threshold return sum(t * ks < 0) / len(X)
def prediction(inputs: np.ndarray, labels: np.ndarray, query: np.ndarray, tau: float, lamb: float): theta = newton_raphson(inputs, labels, query, tau, lamb) prob = logistic(query, theta) return int(prob > 0.5)
def train(self, epochs, metrics_writer): # pylint: disable=too-many-locals,too-many-branches,too-many-statements try: tf.logging.info("Memory usage before training: {}".format( get_memory_usage_string())) except: # pylint: disable=bare-except tf.logging.warning("Unable to get memory usage, no GPU available?") assert not self._config.train_disc_on_previous_images \ and not self._config.real_image_noise_stdev, "not implemented" checkpoint_interval = 25 # always have same interval for easier epoch number -> checkpoint-number conversion gradients_interval = epochs // 5 // 25 * 25 # aim for at least 5 gradients in total but have it a multiple of 25 gradients_interval = 25 if gradients_interval == 0 else min( gradients_interval, 150) if self._config.scores_every_epoch: scores_interval = 1 else: scores_interval = epochs // 10 // 10 * 10 # aim for at least 10 percentual scores in total but have it a multiple of 10 scores_interval = 10 if scores_interval == 0 else min( scores_interval, 25) tf.logging.info( "Intervals: checkpoint {}, scores {}, gradients {}".format( checkpoint_interval, scores_interval, gradients_interval)) for epoch in range(epochs): start = time.time() metrics = TwoWayMetrics(epoch + 1, 4) # NOTE, KEEP IN MIND: "first"/"second" refers to the input domain # ie "first generator" is the generator that receives input from the first domain (and generates for the second) if self._config.extra_disc_step_real or self._config.extra_disc_step_both: for batch_number, batch in enumerate(self.data_set): batch_first_domain, batch_second_domain = batch forward_result = self.train_discriminator( self._first_generator if self._config.extra_disc_step_both else None, self._second_discriminator, batch_first_domain, batch_second_domain) backward_result = self.train_discriminator( self._second_generator if self._config.extra_disc_step_both else None, self._first_discriminator, batch_second_domain, batch_first_domain) self._second_discriminator_optimizer.apply_gradients( zip(forward_result.disc_gradients, self._second_discriminator.variables)) self._first_discriminator_optimizer.apply_gradients( zip(backward_result.disc_gradients, self._first_discriminator.variables)) for batch_number, batch in enumerate(self.data_set): batch_first_domain, batch_second_domain = batch # evaluate models forward_result = self.train_generator_discriminator_pair( self._first_generator, self._second_discriminator, self._second_generator, batch_first_domain, batch_second_domain) backward_result = self.train_generator_discriminator_pair( self._second_generator, self._first_discriminator, self._first_generator, batch_second_domain, batch_first_domain) # store results metrics.add_losses( (forward_result.gen_losses, backward_result.gen_losses), (backward_result.disc_loss, forward_result.disc_loss)) metrics.add_discriminations( (logistic(backward_result.disc_on_real), logistic(forward_result.disc_on_real)), (logistic(backward_result.disc_on_generated), logistic(forward_result.disc_on_generated))) # train self._first_generator_optimizer.apply_gradients( zip(forward_result.gen_gradients, self._first_generator.variables)) self._second_discriminator_optimizer.apply_gradients( zip(forward_result.disc_gradients, self._second_discriminator.variables)) self._second_generator_optimizer.apply_gradients( zip(backward_result.gen_gradients, self._second_generator.variables)) self._first_discriminator_optimizer.apply_gradients( zip(backward_result.disc_gradients, self._first_discriminator.variables)) if batch_number == 0: # work with the gradients of the first (rather than last) batch since here, the batch is full for sure for i, variable in enumerate( self._first_generator.variables): if "batch_normalization" in variable.name or forward_result.gen_gradients[ i] is None: continue tf.contrib.summary.histogram( variable.name.replace(":", "_"), forward_result.gen_gradients[i], "gradients/first_gen", epoch) for i, variable in enumerate( self._first_discriminator.variables): if "batch_normalization" in variable.name or backward_result.disc_gradients[ i] is None: continue tf.contrib.summary.histogram( variable.name.replace(":", "_"), backward_result.disc_gradients[i], "gradients/first_disc", epoch) for i, variable in enumerate( self._second_generator.variables): if "batch_normalization" in variable.name or backward_result.gen_gradients[ i] is None: continue tf.contrib.summary.histogram( variable.name.replace(":", "_"), backward_result.gen_gradients[i], "gradients/second_gen", epoch) for i, variable in enumerate( self._second_discriminator.variables): if "batch_normalization" in variable.name or forward_result.disc_gradients[ i] is None: continue tf.contrib.summary.histogram( variable.name.replace(":", "_"), forward_result.disc_gradients[i], "gradients/second_disc", epoch) if (epoch + 1 ) % gradients_interval == 0 or epoch == epochs - 1: first_generator_gradients = [(variable.name, forward_result.gen_gradients[i].numpy()) \ for i, variable in enumerate(self._first_generator.variables) if "batch_normalization" not in variable.name] first_discriminator_gradients = [(variable.name, backward_result.disc_gradients[i].numpy()) \ for i, variable in enumerate(self._first_discriminator.variables) if "batch_normalization" not in variable.name] second_generator_gradients = [(variable.name, backward_result.gen_gradients[i].numpy()) \ for i, variable in enumerate(self._second_generator.variables) if "batch_normalization" not in variable.name] second_discriminator_gradients = [(variable.name, forward_result.disc_gradients[i].numpy()) \ for i, variable in enumerate(self._second_discriminator.variables) if "batch_normalization" not in variable.name] with open( os.path.join( self._config.gradients_dir, "gradients_at_epoch_{:04d}.pkl".format( epoch + 1)), "wb") as fh: pickle.dump((first_generator_gradients, forward_result.gen_loss, second_generator_gradients, backward_result.gen_loss, first_discriminator_gradients, backward_result.disc_loss, second_discriminator_gradients, forward_result.disc_loss), fh) if self.extra_discriminator_data_set: assert not self._config.conditioned_discriminator and \ not self._config.train_disc_on_previous_images and \ not self._config.real_image_noise_stdev, "not implemented" for disc_input in self.extra_discriminator_data_set: with tf.GradientTape() as disc_tape: disc_on_real = self._second_discriminator( disc_input, training=True) disc_loss = self._model.disc_loss( disc_on_real, []) # no generated samples gradients_of_discriminator = disc_tape.gradient( disc_loss, self._second_discriminator.variables) self._second_discriminator_optimizer.apply_gradients( zip(gradients_of_discriminator, self._second_discriminator.variables)) _ = self.save_epoch_samples( (self._first_generator, self._second_generator), (self._first_discriminator, self._second_discriminator), epoch + 1, (epoch + 1) % 5 == 0) tf.contrib.summary.histogram("first_gen", metrics.first_gen_loss, "loss", epoch) tf.contrib.summary.histogram("first_disc", metrics.first_disc_loss, "loss", epoch) tf.contrib.summary.histogram("second_gen", metrics.second_gen_loss, "loss", epoch) tf.contrib.summary.histogram("second_disc", metrics.second_disc_loss, "loss", epoch) tf.contrib.summary.histogram("first_on_real", metrics.first_disc_on_real, "predictions", epoch) tf.contrib.summary.histogram("first_on_gen", metrics.first_disc_on_generated, "predictions", epoch) tf.contrib.summary.histogram("second_on_real", metrics.second_disc_on_real, "predictions", epoch) tf.contrib.summary.histogram("second_on_gen", metrics.second_disc_on_generated, "predictions", epoch) if (epoch + 1) % checkpoint_interval == 0 or epoch == epochs - 1: self._checkpoint.save() elif epoch > epochs - 6 and self._final_checkpoint: self._final_checkpoint.save() if epoch == 0 or epoch == 4 or (epoch + 1) % 10 == 0: memory_usage = "" if epoch == 0 or epoch == 4 or (epoch + 1) % 50 == 0: try: memory_usage = " - memory: " + get_memory_usage_string( ) except: # pylint: disable=bare-except memory_usage = " - Unable to get memory usage, no GPU available?" time_remaining = (time.time() - self._config.start_time) / ( epoch + 1) * (epochs - epoch - 1) / 60 tf.logging.info( "{}/{}: Round time: {:.1f}m - ETA {:%H:%M} ({:.1f}h){}". format(epoch + 1, epochs, (time.time() - start) / 60, datetime.now() + timedelta(minutes=time_remaining), time_remaining / 60, memory_usage)) tf.logging.info( "{}/{} FWD: Loss G {:.2f}+-{:.3f}, D {:.2f}+-{:.3f}; D on real {:.3f}+-{:.3f}, on fake {:.3f}+-{:.3f}" .format(epoch + 1, epochs, np.mean(metrics.first_gen_loss), np.std(metrics.first_gen_loss), np.mean(metrics.second_disc_loss), np.std(metrics.second_disc_loss), np.mean(metrics.second_disc_on_real), np.std(metrics.second_disc_on_real), np.mean(metrics.second_disc_on_generated), np.std(metrics.second_disc_on_generated))) tf.logging.info( "{}/{} BWD: Loss G {:.2f}+-{:.3f}, D {:.2f}+-{:.3f}; D on real {:.3f}+-{:.3f}, on fake {:.3f}+-{:.3f}" .format(epoch + 1, epochs, np.mean(metrics.second_gen_loss), np.std(metrics.second_gen_loss), np.mean(metrics.first_disc_loss), np.std(metrics.first_disc_loss), np.mean(metrics.first_disc_on_real), np.std(metrics.first_disc_on_real), np.mean(metrics.first_disc_on_generated), np.std(metrics.first_disc_on_generated))) is_near_interval = \ (epoch+0) % scores_interval == 0 or \ (epoch+1) % scores_interval == 0 or \ (epoch+2) % scores_interval == 0 if (epoch > 1 or self._config.scores_every_epoch) and ( epoch > epochs - 6 or is_near_interval): first_fid = first_mmd = first_clustering_high = first_clustering_low = first_combined_fid = tf.convert_to_tensor( np.nan) first_low_level_fids = [tf.convert_to_tensor(np.nan) ] * metrics.n_low_level_fids second_fid = second_mmd = second_clustering_high = second_clustering_low = second_combined_fid = tf.convert_to_tensor( np.nan) second_low_level_fids = [tf.convert_to_tensor(np.nan) ] * metrics.n_low_level_fids if self._perceptual_scores: first_fid, first_mmd, first_clustering_high, first_clustering_low, first_low_level_fids, first_combined_fid = \ self._perceptual_scores.compute_scores_from_generator(self._first_generator, self.data_set.map(lambda x, y: x)) tf.logging.warning( "{}/{}: FWD: Computed perceptual scores: FID={:.1f}, MMD={:.3f}, clustering-high={:.3f}, clustering-low={:.3f}" .format(epoch + 1, epochs, first_fid, first_mmd, first_clustering_high, first_clustering_low)) if self._reverse_perceptual_scores: second_fid, second_mmd, second_clustering_high, second_clustering_low, second_low_level_fids, second_combined_fid = \ self._reverse_perceptual_scores.compute_scores_from_generator(self._second_generator, self.data_set.map(lambda x, y: y)) tf.logging.warning( "{}/{}: BWD: Computed perceptual scores: FID={:.1f}, MMD={:.3f}, clustering-high={:.3f}, clustering-low={:.3f}" .format(epoch + 1, epochs, second_fid, second_mmd, second_clustering_high, second_clustering_low)) metrics.add_perceptual_scores( (first_fid, second_fid), (first_mmd, second_mmd), (first_clustering_high, second_clustering_high), (first_clustering_low, second_clustering_low), (first_low_level_fids, second_low_level_fids), (first_combined_fid, second_combined_fid)) tf.contrib.summary.scalar("first_fid", first_fid, "perceptual", epoch) tf.contrib.summary.scalar("first_mmd", first_mmd, "perceptual", epoch) tf.contrib.summary.scalar("first_clustering_high", first_clustering_high, "perceptual", epoch) tf.contrib.summary.scalar("first_clustering_low", first_clustering_low, "perceptual", epoch) # not adding low-level FIDs to TB since I'm not using it anyway tf.contrib.summary.scalar("first_combined_fid", first_combined_fid, "perceptual", epoch) tf.contrib.summary.scalar("second_fid", second_fid, "perceptual", epoch) tf.contrib.summary.scalar("second_mmd", second_mmd, "perceptual", epoch) tf.contrib.summary.scalar("second_clustering_high", second_clustering_high, "perceptual", epoch) tf.contrib.summary.scalar("second_clustering_low", second_clustering_low, "perceptual", epoch) # not adding low-level FIDs to TB since I'm not using it anyway tf.contrib.summary.scalar("second_combined_fid", second_combined_fid, "perceptual", epoch) if self.test_data_set and ( epoch > 1 or self._config.scores_every_epoch) and ( epoch > epochs - 6 or is_near_interval): first_disc_on_training = self._discriminate_data_set( self._first_discriminator, self.data_set.map(lambda x, y: (y, x[:, :, :, :3]))) first_disc_on_training_mean = np.mean(first_disc_on_training) first_disc_on_training_std = np.std(first_disc_on_training) first_disc_on_test = self._discriminate_data_set( self._first_discriminator, self.test_data_set.map(lambda x, y: (y, x[:, :, :, :3]))) first_disc_on_test_mean = np.mean(first_disc_on_test) first_disc_on_test_std = np.std(first_disc_on_test) second_disc_on_training = self._discriminate_data_set( self._second_discriminator, self.data_set.map(lambda x, y: (x, y[:, :, :, :3]))) second_disc_on_training_mean = np.mean(second_disc_on_training) second_disc_on_training_std = np.std(second_disc_on_training) second_disc_on_test = self._discriminate_data_set( self._second_discriminator, self.test_data_set.map(lambda x, y: (x, y[:, :, :, :3]))) second_disc_on_test_mean = np.mean(second_disc_on_test) second_disc_on_test_std = np.std(second_disc_on_test) tf.logging.warning( "{}/{}: First disc on training: {:.3f}+-{:.3f}, on test: {:.3f}+-{:.3f}, diff: {:.3f}" .format( epoch + 1, epochs, first_disc_on_training_mean, first_disc_on_training_std, first_disc_on_test_mean, first_disc_on_test_std, first_disc_on_training_mean - first_disc_on_test_mean)) tf.logging.warning( "{}/{}: Second disc on training: {:.3f}+-{:.3f}, on test: {:.3f}+-{:.3f}, diff: {:.3f}" .format( epoch + 1, epochs, second_disc_on_training_mean, second_disc_on_training_std, second_disc_on_test_mean, second_disc_on_test_std, second_disc_on_training_mean - second_disc_on_test_mean)) metrics.add_disc_on_training_test( (first_disc_on_training_mean, second_disc_on_training_mean), (first_disc_on_training_std, second_disc_on_training_std), (first_disc_on_test_mean, second_disc_on_test_mean), (first_disc_on_test_std, second_disc_on_test_std)) tf.contrib.summary.scalar("first_disc_on_training_mean", first_disc_on_training_mean, "disc_overfitting", epoch) tf.contrib.summary.scalar("first_disc_on_training_std", first_disc_on_training_std, "disc_overfitting", epoch) tf.contrib.summary.scalar("first_disc_on_test_mean", first_disc_on_test_mean, "disc_overfitting", epoch) tf.contrib.summary.scalar("first_disc_on_test_std", first_disc_on_test_std, "disc_overfitting", epoch) tf.contrib.summary.scalar("second_disc_on_training_mean", second_disc_on_training_mean, "disc_overfitting", epoch) tf.contrib.summary.scalar("second_disc_on_training_std", second_disc_on_training_std, "disc_overfitting", epoch) tf.contrib.summary.scalar("second_disc_on_test_mean", second_disc_on_test_mean, "disc_overfitting", epoch) tf.contrib.summary.scalar("second_disc_on_test_std", second_disc_on_test_std, "disc_overfitting", epoch) metrics_writer.writerow(metrics.get_row_data())