def main(unused_argv): # Load training and eval data. train_file = "data/train.csv" val_file = "data/val.csv" test_file = "data/test.csv" # TabNet model tabnet_forest_covertype = tabnet_model.TabNet( columns=data_helper_covertype.get_columns(), num_features=data_helper_covertype.num_features, feature_dim=128, output_dim=64, num_decision_steps=6, relaxation_factor=1.5, batch_momentum=0.7, virtual_batch_size=512, num_classes=data_helper_covertype.num_classes) column_names = sorted(data_helper_covertype.feature_columns) print( "Ordered column names, corresponding to the indexing in Tensorboard visualization" ) for fi in range(len(column_names)): print(str(fi) + " : " + column_names[fi]) # Training parameters max_steps = 10 display_step = 5 val_step = 5 save_step = 5 init_localearning_rate = 0.02 decay_every = 500 decay_rate = 0.95 batch_size = 512 sparsity_loss_weight = 0.0001 gradient_thresh = 2000.0 # Input sampling train_batch = data_helper_covertype.input_fn(train_file, num_epochs=100000, shuffle=True, batch_size=batch_size, n_buffer=1, n_parallel=1) val_batch = data_helper_covertype.input_fn(val_file, num_epochs=10000, shuffle=False, batch_size=batch_size, n_buffer=1, n_parallel=1) test_batch = data_helper_covertype.input_fn(test_file, num_epochs=10000, shuffle=False, batch_size=batch_size, n_buffer=1, n_parallel=1) train_iter = train_batch.make_initializable_iterator() val_iter = val_batch.make_initializable_iterator() test_iter = test_batch.make_initializable_iterator() feature_train_batch, label_train_batch = train_iter.get_next() feature_val_batch, label_val_batch = val_iter.get_next() feature_test_batch, label_test_batch = test_iter.get_next() # Define the model and losses encoded_train_batch, total_entropy = tabnet_forest_covertype.encoder( feature_train_batch, reuse=False, is_training=True) logits_orig_batch, _ = tabnet_forest_covertype.classify( encoded_train_batch, reuse=False) softmax_orig_key_op = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits_orig_batch, labels=label_train_batch)) train_loss_op = softmax_orig_key_op + sparsity_loss_weight * total_entropy tf.summary.scalar("Total loss", train_loss_op) # Optimization step global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.exponential_decay(init_localearning_rate, global_step=global_step, decay_steps=decay_every, decay_rate=decay_rate) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): gvs = optimizer.compute_gradients(train_loss_op) capped_gvs = [(tf.clip_by_value(grad, -gradient_thresh, gradient_thresh), var) for grad, var in gvs] train_op = optimizer.apply_gradients(capped_gvs, global_step=global_step) # Model evaluation # Validation performance encoded_val_batch, _ = tabnet_forest_covertype.encoder(feature_val_batch, reuse=True, is_training=True) _, prediction_val = tabnet_forest_covertype.classify(encoded_val_batch, reuse=True) predicted_labels = tf.cast(tf.argmax(prediction_val, 1), dtype=tf.int32) val_eq_op = tf.equal(predicted_labels, label_val_batch) val_acc_op = tf.reduce_mean(tf.cast(val_eq_op, dtype=tf.float32)) tf.summary.scalar("Val accuracy", val_acc_op) # Test performance encoded_test_batch, _ = tabnet_forest_covertype.encoder(feature_test_batch, reuse=True, is_training=True) _, prediction_test = tabnet_forest_covertype.classify(encoded_test_batch, reuse=True) predicted_labels = tf.cast(tf.argmax(prediction_test, 1), dtype=tf.int32) test_eq_op = tf.equal(predicted_labels, label_test_batch) test_acc_op = tf.reduce_mean(tf.cast(test_eq_op, dtype=tf.float32)) tf.summary.scalar("Test accuracy", test_acc_op) # Training setup model_name = "tabnet_forest_covertype_model" init = tf.initialize_all_variables() init_local = tf.local_variables_initializer() init_table = tf.tables_initializer(name="Initialize_all_tables") saver = tf.train.Saver() summaries = tf.summary.merge_all() with tf.Session() as sess: summary_writer = tf.summary.FileWriter("./tflog/" + model_name, sess.graph) sess.run(init) sess.run(init_local) sess.run(init_table) sess.run(train_iter.initializer) sess.run(val_iter.initializer) sess.run(test_iter.initializer) for step in range(1, max_steps + 1): if step % display_step == 0: _, train_loss, merged_summary = sess.run( [train_op, train_loss_op, summaries]) summary_writer.add_summary(merged_summary, step) print("Step " + str(step) + " , Training Loss = " + "{:.4f}".format(train_loss)) else: _ = sess.run(train_op) if step % val_step == 0: feed_arr = [ vars()["summaries"], vars()["val_acc_op"], vars()["test_acc_op"] ] val_arr = sess.run(feed_arr) merged_summary = val_arr[0] val_acc = val_arr[1] print("Step " + str(step) + " , Val Accuracy = " + "{:.4f}".format(val_acc)) summary_writer.add_summary(merged_summary, step) if step % save_step == 0: saver.save(sess, "./checkpoints/" + model_name + ".ckpt")
def prepare_processing_graph(self, model_settings): """Builds a TensorFlow graph to apply the input distortions. Creates a graph that loads a WAVE file, decodes it, scales the volume, shifts it in time, adds in background noise, calculates a spectrogram, and then builds an MFCC fingerprint from that. This must be called with an active TensorFlow session running, and it creates multiple placeholder inputs, and one output: - wav_filename_placeholder_: Filename of the WAV to load. - foreground_volume_placeholder_: How loud the main clip should be. - time_shift_padding_placeholder_: Where to pad the clip. - time_shift_offset_placeholder_: How much to move the clip in time. - background_data_placeholder_: PCM sample data for background noise. - background_volume_placeholder_: Loudness of mixed-in background. - mfcc_: Output 2D fingerprint of processed audio. Args: model_settings: Information about the current model being trained. """ desired_samples = model_settings['desired_samples'] channel_count = model_settings['channel_count'] sample_rate = model_settings['sample_rate'] self.foreground_data_placeholder_ = tf.placeholder( tf.float32, [desired_samples, channel_count]) # Allow the audio sample's volume to be adjusted. self.foreground_volume_placeholder_ = tf.placeholder(tf.float32, []) scaled_foreground = tf.multiply(self.foreground_data_placeholder_, self.foreground_volume_placeholder_) # Shift the sample's start position, and pad any gaps with zeros. self.time_shift_padding_placeholder_ = tf.placeholder(tf.int32, [2, 2]) self.time_shift_offset_placeholder_ = tf.placeholder(tf.int32, [2]) padded_foreground = tf.pad(scaled_foreground, self.time_shift_padding_placeholder_, mode='CONSTANT') sliced_foreground = tf.slice(padded_foreground, self.time_shift_offset_placeholder_, [desired_samples, -1]) # Mix in background noise. self.background_data_placeholder_ = tf.placeholder( tf.float32, [desired_samples, channel_count]) self.background_volume_placeholder_ = tf.placeholder(tf.float32, []) background_mul = tf.multiply(self.background_data_placeholder_, self.background_volume_placeholder_) background_add = tf.add(background_mul, sliced_foreground) background_clamp = tf.clip_by_value(background_add, -1.0, 1.0) # Run the spectrogram and MFCC ops to get a 2D 'fingerprint' of the audio. self.waveform_ = background_clamp spectrograms = [] for ichannel in range(channel_count): spectrograms.append( audio_ops.audio_spectrogram( tf.slice(background_clamp, [0, ichannel], [-1, 1]), window_size=model_settings['window_size_samples'], stride=model_settings['window_stride_samples'], magnitude_squared=True)) self.spectrogram_ = tf.stack(spectrograms, -1) mfccs = [] for ichannel in range(channel_count): mfccs.append( audio_ops.mfcc( spectrograms[ichannel], sample_rate, upper_frequency_limit=model_settings['sample_rate'] // 2, filterbank_channel_count=model_settings[ 'filterbank_channel_count'], dct_coefficient_count=model_settings[ 'dct_coefficient_count'])) self.mfcc_ = tf.stack(mfccs, -1)
def bilinear_interp(im, x, y, name): """Perform bilinear sampling on im given x, y coordinates This function implements the differentiable sampling mechanism with bilinear kernel. Introduced in https://arxiv.org/abs/1506.02025, equation (5). x,y are tensors specfying normalized coorindates [-1,1] to sample from im. (-1,1) means (0,0) coordinate in im. (1,1) means the most bottom right pixel. Args: im: Tensor of size [batch_size, height, width, depth] x: Tensor of size [batch_size, height, width, 1] y: Tensor of size [batch_size, height, width, 1] name: String for the name for this opt. Returns: Tensor of size [batch_size, height, width, depth] """ with tf.variable_scope(name): x = tf.reshape(x, [-1]) y = tf.reshape(y, [-1]) # constants num_batch = tf.shape(im)[0] _, height, width, channels = im.get_shape().as_list() x = tf.to_float(x) y = tf.to_float(y) height_f = tf.cast(height, 'float32') width_f = tf.cast(width, 'float32') zero = tf.constant(0, dtype=tf.int32) max_x = tf.cast(tf.shape(im)[2] - 1, 'int32') max_y = tf.cast(tf.shape(im)[1] - 1, 'int32') x = (x + 1.0) * (width_f - 1.0) / 2.0 y = (y + 1.0) * (height_f - 1.0) / 2.0 # Sampling x0 = tf.cast(tf.floor(x), 'int32') x1 = x0 + 1 y0 = tf.cast(tf.floor(y), 'int32') y1 = y0 + 1 x0 = tf.clip_by_value(x0, zero, max_x) x1 = tf.clip_by_value(x1, zero, max_x) y0 = tf.clip_by_value(y0, zero, max_y) y1 = tf.clip_by_value(y1, zero, max_y) dim2 = width dim1 = width * height # Create base index base = tf.range(num_batch) * dim1 base = tf.reshape(base, [-1, 1]) base = tf.tile(base, [1, height * width]) base = tf.reshape(base, [-1]) base_y0 = base + y0 * dim2 base_y1 = base + y1 * dim2 idx_a = base_y0 + x0 idx_b = base_y1 + x0 idx_c = base_y0 + x1 idx_d = base_y1 + x1 # Use indices to look up pixels im_flat = tf.reshape(im, tf.stack([-1, channels])) im_flat = tf.to_float(im_flat) pixel_a = tf.gather(im_flat, idx_a) pixel_b = tf.gather(im_flat, idx_b) pixel_c = tf.gather(im_flat, idx_c) pixel_d = tf.gather(im_flat, idx_d) # Interpolate the values x1_f = tf.to_float(x1) y1_f = tf.to_float(y1) wa = tf.expand_dims((x1_f - x) * (y1_f - y), 1) wb = tf.expand_dims((x1_f - x) * (1.0 - (y1_f - y)), 1) wc = tf.expand_dims((1.0 - (x1_f - x)) * (y1_f - y), 1) wd = tf.expand_dims((1.0 - (x1_f - x)) * (1.0 - (y1_f - y)), 1) output = tf.add_n( [wa * pixel_a, wb * pixel_b, wc * pixel_c, wd * pixel_d]) output = tf.reshape(output, shape=tf.stack( [num_batch, height, width, channels])) return output
def LogitsFromProb(prob): return tf.log(tf.clip_by_value(prob, 1e-12, 1.0))
def construct_model(self, input_tensors=None, prefix='metatrain_'): # a: training data for inner gradient, b: test data for meta gradient if input_tensors is None: self.inputa = tf.placeholder(tf.float32) self.inputb = tf.placeholder(tf.float32) self.labela = tf.placeholder(tf.float32) self.labelb = tf.placeholder(tf.float32) else: self.inputa = input_tensors['inputa'] self.inputb = input_tensors['inputb'] self.labela = input_tensors['labela'] self.labelb = input_tensors['labelb'] with tf.variable_scope('model', reuse=None) as training_scope: if 'weights' in dir(self): training_scope.reuse_variables() weights = self.weights else: # Define the weights self.weights = weights = self.construct_weights() # outputbs[i] and lossesb[i] is the output and loss after i+1 gradient updates lossesa, outputas, lossesb, outputbs = [], [], [], [] accuraciesa, accuraciesb = [], [] num_updates = max(self.test_num_updates, FLAGS.num_updates) outputbs = [[]] * num_updates lossesb = [[]] * num_updates accuraciesb = [[]] * num_updates def task_metalearn(inp, reuse=True): """ Perform gradient descent for one task in the meta-batch. """ inputa, inputb, labela, labelb = inp task_outputbs, task_lossesb = [], [] if self.classification: task_accuraciesb = [] task_outputa = self.forward( inputa, weights, reuse=reuse) # only reuse on the first iter task_lossa = self.loss_func(task_outputa, labela) grads = tf.gradients(task_lossa, list(weights.values())) if FLAGS.stop_grad: grads = [tf.stop_gradient(grad) for grad in grads] gradients = dict(zip(weights.keys(), grads)) fast_weights = dict( zip(weights.keys(), [ weights[key] - self.update_lr * gradients[key] for key in weights.keys() ])) output = self.forward(inputb, fast_weights, reuse=True) task_outputbs.append(output) task_lossesb.append(self.loss_func(output, labelb)) for j in range(num_updates - 1): loss = self.loss_func( self.forward(inputa, fast_weights, reuse=True), labela) grads = tf.gradients(loss, list(fast_weights.values())) if FLAGS.stop_grad: grads = [tf.stop_gradient(grad) for grad in grads] gradients = dict(zip(fast_weights.keys(), grads)) fast_weights = dict( zip(fast_weights.keys(), [ fast_weights[key] - self.update_lr * gradients[key] for key in fast_weights.keys() ])) output = self.forward(inputb, fast_weights, reuse=True) task_outputbs.append(output) task_lossesb.append(self.loss_func(output, labelb)) task_output = [ task_outputa, task_outputbs, task_lossa, task_lossesb ] if self.classification: task_accuracya = contrib_metrics.accuracy( tf.argmax(tf.nn.softmax(task_outputa), 1), tf.argmax(labela, 1)) for j in range(num_updates): task_accuraciesb.append( contrib_metrics.accuracy( tf.argmax(tf.nn.softmax(task_outputbs[j]), 1), tf.argmax(labelb, 1))) task_output.extend([task_accuracya, task_accuraciesb]) return task_output if FLAGS.norm != 'None': # to initialize the batch norm vars, might want to combine this, and not run idx 0 twice. unused = task_metalearn((self.inputa[0], self.inputb[0], self.labela[0], self.labelb[0]), False) out_dtype = [ tf.float32, [tf.float32] * num_updates, tf.float32, [tf.float32] * num_updates ] if self.classification: out_dtype.extend([tf.float32, [tf.float32] * num_updates]) result = tf.map_fn(task_metalearn, elems=(self.inputa, self.inputb, self.labela, self.labelb), dtype=out_dtype, parallel_iterations=FLAGS.meta_batch_size) if self.classification: outputas, outputbs, lossesa, lossesb, accuraciesa, accuraciesb = result else: outputas, outputbs, lossesa, lossesb = result ## Performance & Optimization if 'train' in prefix: self.total_loss1 = total_loss1 = tf.reduce_sum( lossesa) / tf.to_float(FLAGS.meta_batch_size) self.total_losses2 = total_losses2 = [ tf.reduce_sum(lossesb[j]) / tf.to_float(FLAGS.meta_batch_size) for j in range(num_updates) ] # after the map_fn self.outputas, self.outputbs = outputas, outputbs if self.classification: self.total_accuracy1 = total_accuracy1 = tf.reduce_sum( accuraciesa) / tf.to_float(FLAGS.meta_batch_size) self.total_accuracies2 = total_accuracies2 = [ tf.reduce_sum(accuraciesb[j]) / tf.to_float(FLAGS.meta_batch_size) for j in range(num_updates) ] self.pretrain_op = tf.train.AdamOptimizer( self.meta_lr).minimize(total_loss1) if FLAGS.metatrain_iterations > 0: optimizer = tf.train.AdamOptimizer(self.meta_lr) self.gvs = gvs = optimizer.compute_gradients( self.total_losses2[FLAGS.num_updates - 1]) if FLAGS.datasource == 'miniimagenet' or FLAGS.datasource == 'dclaw': gvs = [(tf.clip_by_value(grad, -10, 10), var) for grad, var in gvs] self.metatrain_op = optimizer.apply_gradients(gvs) else: self.metaval_total_loss1 = total_loss1 = tf.reduce_sum( lossesa) / tf.to_float(FLAGS.meta_batch_size) self.metaval_total_losses2 = total_losses2 = [ tf.reduce_sum(lossesb[j]) / tf.to_float(FLAGS.meta_batch_size) for j in range(num_updates) ] if self.classification: self.metaval_total_accuracy1 = total_accuracy1 = tf.reduce_sum( accuraciesa) / tf.to_float(FLAGS.meta_batch_size) self.metaval_total_accuracies2 = total_accuracies2 = [ tf.reduce_sum(accuraciesb[j]) / tf.to_float(FLAGS.meta_batch_size) for j in range(num_updates) ] ## Summaries tf.summary.scalar(prefix + 'Pre-update loss', total_loss1) if self.classification: tf.summary.scalar(prefix + 'Pre-update accuracy', total_accuracy1) for j in range(num_updates): tf.summary.scalar(prefix + 'Post-update loss, step ' + str(j + 1), total_losses2[j]) if self.classification: tf.summary.scalar( prefix + 'Post-update accuracy, step ' + str(j + 1), total_accuracies2[j])
def scale_values(im): scale = 255.0 / (hi - lo) offset = -lo * scale im = tf.cast(im, tf.float32) * scale + offset im = tf.clip_by_value(im, 0.0, 255.0) return tf.cast(im, tf.uint8)
def _safe_log(x, eps=1e-8): return tf.log(tf.clip_by_value(x, eps, 1.0))
L1 = tf.placeholder('float', [input_nodes]) L2 = tf.placeholder('float', [hidden_nodes]) L3 = tf.placeholder('float', [output_nodes]) # initialize starting weights and biases with random normal distribution W12 = tf.Variable( tf.random.normal([input_nodes, hidden_nodes], stddev=init_stddev)) W23 = tf.Variable( tf.random.normal([hidden_nodes, output_nodes], stddev=init_stddev)) b2 = tf.Variable(tf.random.normal([hidden_nodes], stddev=init_stddev)) b3 = tf.Variable(tf.random.normal([output_nodes], stddev=init_stddev)) # calculate hidden1 outputs from inputs; hidden2 outputs from hidden1; and final outputs from hidden2 # relu and softmax are 'squishification' functions # matmul applies weights; add applies biases hout = tf.nn.relu(tf.add(tf.matmul(L1, W12), b2)) oout = tf.nn.softmax(tf.add(tf.matmul(L2, W23), b3)) # clips values to prevent log(0) error and infinite log(1) chains # uses cross entropy function to calculate cost via entropy # optimizer back-propagates cost using earlier-defined learning rate and the cross_entropy function oout = tf.clip_by_value(oout, 1e-10, 0.99999999) costf = tf.losses.mean_squared_error(oout, ) # tf optimizer # checks each prediction # records accuracy # runs session
def __init__(self, lr, batch_size, dimension, util_train, util_test, campaign, reg_lambda, sigma): # hyperparameters self.lr = lr self.batch_size = batch_size self.util_train = util_train self.util_test = util_test self.reg_lambda = reg_lambda self.sigma = sigma self.emb_size = 20 self.train_data_amt = util_train.get_data_amt() self.test_data_amt = util_test.get_data_amt() # output dir model_name = "{}_{}_{}_{}".format(self.lr, self.reg_lambda, self.batch_size, self.sigma) self.output_dir = "output/deephit/{}/{}/".format(campaign, model_name) if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) # reset graph tf.reset_default_graph() # field params self.field_sizes = self.util_train.feat_sizes self.field_num = len(self.field_sizes) # placeholders self.X = [tf.sparse_placeholder(tf.float64) for i in range(0, self.field_num)] self.z = tf.placeholder(tf.float64) self.b = tf.placeholder(tf.float64) self.y = tf.placeholder(tf.float64) # embedding layer self.var_map = {} # for truncated self.var_map['embed_0'] = tf.Variable( tf.truncated_normal([self.field_sizes[0], 1], dtype=tf.float64)) for i in range(1, self.field_num): self.var_map['embed_%d' % i] = tf.Variable( tf.truncated_normal([self.field_sizes[i], self.emb_size], dtype=tf.float64)) # after embedding w0 = [self.var_map['embed_%d' % i] for i in range(self.field_num)] self.dense_input = tf.concat([tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(self.field_num)], 1) # shared network self.hidden1 = tf.Variable(initial_value=tf.truncated_normal(shape=[(self.field_num - 1) * self.emb_size + 1, HIDDEN_SIZE1], dtype=tf.float64), name='h1') self.out1 = tf.Variable(initial_value=tf.truncated_normal(shape=[HIDDEN_SIZE1, OUT_SIZE1], dtype=tf.float64), name='o1') self.hidden2 = tf.Variable(initial_value=tf.truncated_normal(shape=[OUT_SIZE1, HIDDEN_SIZE2], dtype=tf.float64), name='h2') self.out2 = tf.Variable(initial_value=tf.truncated_normal(shape=[HIDDEN_SIZE2, OUT_SIZE2], dtype=tf.float64), name='o2') # cause-specific network self.hidden1_val = tf.nn.relu(tf.matmul(self.dense_input, self.hidden1)) self.out1_val = tf.sigmoid(tf.matmul(self.hidden1_val, self.out1)) self.hidden2_val = tf.nn.relu(tf.matmul(self.out1_val, self.hidden2)) self.out2_val = tf.sigmoid(tf.matmul(self.hidden2_val, self.out2)) # p_z and w_b self.p = tf.nn.softmax(self.out2_val) self.w = tf.cumsum(self.p, exclusive=True, axis = 1) idx_z = tf.stack([tf.reshape(tf.range(tf.shape(self.z)[0]), (-1,1)), tf.cast(self.z - 1, tf.int32)], axis=-1) idx_b = tf.stack([tf.reshape(tf.range(tf.shape(self.b)[0]), (-1,1)), tf.cast(self.b - 1, tf.int32)], axis=-1) self.pz = tf.gather_nd(self.p, idx_z) self.wb = tf.gather_nd(self.w, idx_b) self.wz = tf.gather_nd(self.w, idx_z) # loss and train step self.loss1 = -tf.reduce_sum(tf.log(tf.clip_by_value(self.pz, 1e-8, 1.0)) * self.y) self.loss2 = -tf.reduce_sum(tf.log(tf.clip_by_value(1 - self.wb, 1e-8, 1.0)) * (1 - self.y)) self.reg_loss = tf.nn.l2_loss(self.hidden1[1:,]) + tf.nn.l2_loss(self.hidden2[1:,]) + \ tf.nn.l2_loss(self.out1[1:,]) + tf.nn.l2_loss(self.out2[1:,]) # get ranking loss self.w_of_pair = tf.transpose(tf.nn.embedding_lookup(tf.transpose(self.w), tf.cast(self.z[:,0] - 1, tf.int32))) self.w_of_self = tf.reshape(tf.tile(tf.reshape(self.wz, (self.batch_size, )), [self.batch_size]), (self.batch_size, self.batch_size)) self.win_label = tf.reshape(tf.tile(tf.reshape(self.y, (self.batch_size, )), [self.batch_size]), (self.batch_size, self.batch_size)) self.delta = self.w_of_self - self.w_of_pair self.candidate = tf.exp(-self.delta / self.sigma) self.rank_loss = tf.reduce_sum(tf.matrix_band_part(self.candidate, -1, 0) * self.win_label) self.loss = self.loss1 + self.loss2 + self.reg_lambda * self.reg_loss + self.rank_loss self.optimizer = tf.train.GradientDescentOptimizer(self.lr) self.train_step = self.optimizer.minimize(self.loss) # session initialization config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def body(i, old_adv_x, old_loss, labels=labels): """Find example with max loss value amongst batch of perturbations.""" deltas = tf.random_uniform(deltas_shape) # generate uniform samples from the l^p unit ball interior if self.ord == np.inf: deltas *= 2. * self.eps deltas -= self.eps elif self.ord == 1: # ref: https://mathoverflow.net/questions/9185/how-to-generate-random-points-in-ell-p-balls pylint: disable=line-too-long exp = -tf.log(deltas) shift = -tf.log(tf.random_uniform(deltas_shape[:2])) norm = tf.reduce_sum(tf.abs(exp), range(2, len(deltas_shape) - 2)) scale = tf.reshape( shift + norm, deltas_shape[:2] + [1] * (len(deltas_shape) - 2)) deltas = exp / scale elif self.ord == 2: # ref: https://blogs.sas.com/content/iml/2016/04/06/generate-points-uniformly-in-ball.html pylint: disable=line-too-long dims = tf.reduce_prod(deltas_shape[2:]) deltas = tf.pow(deltas, 1. / dims) normal = tf.random_normal(deltas) normal /= tf.sqrt(tf.reduce_sum(normal**2, axis=range( 2, len(deltas_shape) - 2)), keepdims=True) deltas *= normal else: raise NotImplementedError('Only L-inf, L1 and L2 norms are ' 'currently implemented.') adv_x = tf.expand_dims(x, 1) + deltas labels = tf.expand_dims(labels, 1) labels = tf.tile(labels, [1, self.num_samples, 1]) if (self.clip_min is not None) and (self.clip_max is not None): adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max) adv_x_r = tf.reshape(adv_x, [-1] + deltas_shape[2:]) preds = self.model.get_probs(adv_x_r) preds_shape = preds.shape.as_list() preds = tf.reshape(preds, deltas_shape[:2] + preds_shape[1:]) if labels is None: # Using model predictions as ground truth to avoid label leaking preds_max = tf.reduce_max(preds, -1, keep_dims=True) labels = tf.to_float(tf.equal(preds, preds_max)) labels = tf.stop_gradient(labels) labels = labels / tf.reduce_sum(labels, -1, keep_dims=True) # Compute loss loss = utils_tf.model_loss(labels, preds, mean=False) if self.y_target is not None: loss = -loss # find the maximum loss value input_idx = tf.one_hot(tf.argmax(loss, axis=1), self.num_samples, axis=1) loss = tf.reduce_sum(loss * input_idx, axis=1) input_idx = tf.reshape( input_idx, deltas_shape[:2] + [1] * (len(deltas_shape) - 2)) adv_x = tf.reduce_sum(adv_x * input_idx, axis=1) condition = tf.greater(old_loss, loss) new_loss = tf.where(condition, old_loss, loss) new_adv_x = tf.where(condition, old_adv_x, adv_x) print(new_loss, new_adv_x) return i + 1, new_adv_x, new_loss
def observe(self, x): self.n += 1. last_mean = tf.identity(self.mean) self.mean += (x-self.mean)/self.n self.mean_diff += (x-last_mean)*(x-self.mean) self.var = tf.clip_by_value(self.mean_diff/self.n, clip_value_min=1e-2, clip_value_max=1000000000)
def _compute_inner_update_onsbet(self, var, grad): update_ops = [] eta = tf.cast(self.eta, var.dtype.base_dtype) betting_domain = tf.cast(self.betting_domain, var.dtype.base_dtype) wealth = self.get_slot(var, INNER_WEALTH) betting_fraction = self.get_slot(var, OUTER_BETTING_FRACTION) inner_betting_fraction = self.get_slot(var, INNER_BETTING_FRACTION) sum_grad_squared = self.get_slot(var, INNER_SUM_GRAD_SQUARED) inner_maximum_gradient = self.get_slot(var, INNER_MAXIMUM_GRADIENT) inner_maximum_gradient_updated = self._assign( inner_maximum_gradient, tf.maximum(inner_maximum_gradient, tf.abs(grad))) update_ops.append(inner_maximum_gradient_updated) clipped_old_betting_fraction = tf.clip_by_value( betting_fraction, -betting_domain, betting_domain) # Process grad to respect truncation to [-betting_domain, betting_domain] truncated_grad = tf.where( tf.greater_equal( grad * (betting_fraction - clipped_old_betting_fraction), 0), grad, tf.zeros(tf.shape(grad))) wealth_delta = -betting_fraction * truncated_grad wealth_updated = self._assign_add(wealth, wealth_delta) update_ops.append(wealth_updated) # This is the gradient with respect to the betting fraction v # use by the ONS algorithm - a kind of "inner inner grad". # Hueristic: We also scale v_grad down by the inner maximum gradient so as # to make it ``unitless''. This is helpful because the learning rate for # ONS is proportional to sum v_grad**2, and so the scale of the learning # rate and of v_grad are unlikely to be properly matched without this. if self.rescale_inner: v_grad = truncated_grad / ( (1.0 - inner_betting_fraction * truncated_grad) * inner_maximum_gradient_updated) else: v_grad = truncated_grad / ( (1.0 - inner_betting_fraction * truncated_grad)) sum_grad_squared_updated = self._assign_add(sum_grad_squared, tf.square(v_grad)) update_ops.append(sum_grad_squared_updated) new_inner_betting_fraction = inner_betting_fraction - eta * v_grad / ( sum_grad_squared_updated) new_inner_betting_fraction = tf.clip_by_value( new_inner_betting_fraction, -betting_domain, betting_domain) inner_betting_fraction_updated = self._assign( inner_betting_fraction, new_inner_betting_fraction) update_ops.append(inner_betting_fraction_updated) if self.output_summaries: mean_inner_betting_fraction_summary = tf.reduce_mean( tf.abs(inner_betting_fraction_updated)) max_inner_betting_fraction_summary = tf.reduce_max( tf.abs(inner_betting_fraction_updated)) inner_maximum_gradient_summary = tf.reduce_max( inner_maximum_gradient_updated) tf.summary.scalar(self._name + "/mean_inner_betting/" + var.name, mean_inner_betting_fraction_summary) tf.summary.scalar(self._name + "/max_inner_betting/" + var.name, max_inner_betting_fraction_summary) tf.summary.scalar( self._name + "/inner_maximum_gradient/" + var.name, inner_maximum_gradient_summary) betting_fraction_updated = self._assign( betting_fraction, inner_betting_fraction_updated * wealth_updated) update_ops.append(betting_fraction_updated) clipped_betting_fraction = tf.clip_by_value(betting_fraction_updated, -betting_domain, betting_domain) return clipped_betting_fraction, tf.group(*update_ops)
def _compute_inner_update_scinol(self, var, grad): update_ops = [] betting_domain = tf.cast(self.betting_domain, var.dtype.base_dtype) reward = self.get_slot(var, INNER_REWARD) betting_fraction = self.get_slot(var, OUTER_BETTING_FRACTION) sum_grad_squared = self.get_slot(var, INNER_SUM_GRAD_SQUARED) sum_grad = self.get_slot(var, INNER_SUM_GRAD) inner_maximum_gradient = self.get_slot(var, INNER_MAXIMUM_GRADIENT) # clip inner gradient to respect previous inner_maximum_gradient value # This introduces at most an additive constant overhead in the regret # since the inner betting fraction lies in a bounded domain. clipped_grad = tf.clip_by_value(grad, -inner_maximum_gradient, inner_maximum_gradient) with tf.control_dependencies([clipped_grad]): inner_maximum_gradient_updated = self._assign( inner_maximum_gradient, tf.maximum(inner_maximum_gradient, tf.abs(grad))) update_ops.append(inner_maximum_gradient_updated) clipped_old_betting_fraction = tf.clip_by_value( betting_fraction, -betting_domain, betting_domain) # Process grad to respect truncation to [-betting_domain, betting_domain] truncated_grad = tf.where( tf.greater_equal( clipped_grad * (betting_fraction - clipped_old_betting_fraction), 0.0), clipped_grad, tf.zeros(tf.shape(clipped_grad))) reward_delta = -betting_fraction * truncated_grad reward_updated = self._assign_add(reward, reward_delta) update_ops.append(reward_updated) sum_grad_squared_updated = self._assign_add(sum_grad_squared, tf.square(truncated_grad)) update_ops.append(sum_grad_squared_updated) sum_grad_updated = self._assign_add(sum_grad, truncated_grad) update_ops.append(sum_grad_updated) # The second term in this maximum, inner_maximum_gradient_updated / self.eta # is a hack to force the betting fraction to not be too big at first. scaling = tf.minimum( tf.rsqrt(sum_grad_squared_updated + tf.square(inner_maximum_gradient_updated)), self.eta / inner_maximum_gradient_updated) theta = -sum_grad_updated * scaling # rescale inner flag is a hack that rescales the epsilon_v by the # maximum inner gradient. if self.rescale_inner: epsilon_scaling = inner_maximum_gradient_updated else: epsilon_scaling = 1.0 inner_betting_fraction = tf.sign(theta) * tf.minimum( tf.abs(theta), 1.0) * scaling / 2.0 new_betting_fraction = inner_betting_fraction * ( reward_updated + epsilon_scaling * self.epsilon_v) betting_fraction_updated = self._assign(betting_fraction, new_betting_fraction) update_ops.append(betting_fraction_updated) clipped_betting_fraction = tf.clip_by_value(betting_fraction_updated, -betting_domain, betting_domain) if self.output_summaries: mean_unclipped_betting_fraction_summary = tf.reduce_mean( tf.abs(betting_fraction_updated)) max_unclipped_betting_fraction_summary = tf.reduce_max( tf.abs(betting_fraction_updated)) mean_clipped_betting_fraction_summary = tf.reduce_mean( tf.abs(clipped_betting_fraction)) max_clipped_betting_fraction_summary = tf.reduce_max( tf.abs(clipped_betting_fraction)) max_abs_gradient = tf.reduce_max(tf.abs(grad)) max_truncated_grad = tf.reduce_max(tf.abs(truncated_grad)) tf.summary.scalar(self._name + "/mean_unclipped_bet/" + var.name, mean_unclipped_betting_fraction_summary) tf.summary.scalar(self._name + "/max_unclipped_bet/" + var.name, max_unclipped_betting_fraction_summary) tf.summary.scalar(self._name + "/mean_clipped_bet/" + var.name, mean_clipped_betting_fraction_summary) tf.summary.scalar(self._name + "/max_clipped_bet/" + var.name, max_clipped_betting_fraction_summary) tf.summary.scalar(self._name + "/max_abs_inner_grad/" + var.name, max_abs_gradient) tf.summary.scalar( self._name + "/max_abs_truncated_inner_grad/" + var.name, max_truncated_grad) return clipped_betting_fraction, tf.group(*update_ops)
def __call__(self, input_state, location_scale, prev_locations=None, is_training=False, policy="learned", sampling_stddev=1e-5): """Builds emission network. Args: input_state: 2-D Tensor of shape [batch, state dimensionality] location_scale: <= 1. and >= 0. the normalized location range [-location_scale, location_scale] prev_locations: if not None add prev_location to current proposed location (ie using relative locations) is_training: (Boolean) to indicate training or inference modes. policy: (String) 'learned': uses learned policy, 'random': uses random policy, or 'center': uses center look policy. sampling_stddev: Sampling distribution standard deviation. Returns: locations: network output reflecting next location to look at (normalized to range [-location_scale, location_scale]). The image locations mapping to locs are as follows: (-1, -1): upper left corner. (-1, 1): upper right corner. (1, 1): lower right corner. (1, -1): lower left corner. endpoints: dictionary with activations at different layers. """ if self.var_list: reuse = True else: reuse = False batch_size = input_state.shape.as_list()[0] tf.logging.info("BUILD Emission Network") endpoints = {} net = input_state # Fully connected layers. with tf.variable_scope("emission_network", reuse=reuse): net, endpoints_ = model_utils.build_fc_layers( net, self.num_units_fc_layers, activation=self.activation, regularizer=self.regularizer) endpoints.update(endpoints_) # Tanh output layer. with tf.variable_scope("emission_network/output", reuse=reuse): output, _ = model_utils.build_fc_layers( net, [self.location_dims], activation=tf.nn.tanh, regularizer=self.regularizer) # scale location ([-location_scale, location_scale] range mean_locations = location_scale * output if prev_locations is not None: mean_locations = prev_locations + mean_locations if policy == "learned": endpoints["mean_locations"] = mean_locations if is_training: # At training samples random location. locations = mean_locations + tf.random_normal( shape=(batch_size, self.location_dims), stddev=sampling_stddev) # Ensures range [-location_scale, location_scale] locations = tf.clip_by_value(locations, -location_scale, location_scale) tf.logging.info("Sampling locations.") tf.logging.info( "====================================================") else: # At inference uses the mean value for the location. locations = mean_locations locations = tf.stop_gradient(locations) elif policy == "random": # Use random policy for location. locations = tf.random_uniform(shape=(batch_size, self.location_dims), minval=-location_scale, maxval=location_scale) endpoints["mean_locations"] = mean_locations elif policy == "center": # Use center look policy. locations = tf.zeros(shape=(batch_size, self.location_dims)) endpoints["mean_locations"] = mean_locations else: raise ValueError( "policy can be either 'learned', 'random', or 'center'") if not reuse: self.collect_variables() return locations, endpoints
def eval_op(batch, hparams, config_name): """Define a evaluation op. Args: batch: Batch produced by NSynthReader. hparams: Hyperparameters. config_name: Name of config module. Returns: eval_op: A complete evaluation op with summaries. """ phase = not (hparams.mag_only or hparams.raw_audio) config = utils.get_module("baseline.models.ae_configs.%s" % config_name) if hparams.raw_audio: x = batch["audio"] # Add height and channel dims x = tf.expand_dims(tf.expand_dims(x, 1), -1) else: x = batch["spectrogram"] # Define the model with tf.name_scope("Model"): z = config.encode(x, hparams, is_training=False) xhat = config.decode(z, batch, hparams, is_training=False) # For interpolation tf.add_to_collection("x", x) tf.add_to_collection("pitch", batch["pitch"]) tf.add_to_collection("z", z) tf.add_to_collection("xhat", xhat) total_loss = compute_mse_loss(x, xhat, hparams) # Define the metrics: names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ "Loss": slim.metrics.mean(total_loss), }) # Define the summaries for name, value in names_to_values.items(): slim.summaries.add_scalar_summary(value, name, print_summary=True) # Interpolate with tf.name_scope("Interpolation"): xhat = config.decode(z, batch, hparams, reuse=True, is_training=False) # Linear interpolation z_shift_one_example = tf.concat([z[1:], z[:1]], 0) z_linear_half = (z + z_shift_one_example) / 2.0 xhat_linear_half = config.decode(z_linear_half, batch, hparams, reuse=True, is_training=False) # Pitch shift pitch_plus_2 = tf.clip_by_value(batch["pitch"] + 2, 0, 127) pitch_minus_2 = tf.clip_by_value(batch["pitch"] - 2, 0, 127) batch["pitch"] = pitch_minus_2 xhat_pitch_minus_2 = config.decode(z, batch, hparams, reuse=True, is_training=False) batch["pitch"] = pitch_plus_2 xhat_pitch_plus_2 = config.decode(z, batch, hparams, reuse=True, is_training=False) utils.specgram_summaries(x, "Training Examples", hparams, phase=phase) utils.specgram_summaries(xhat, "Reconstructions", hparams, phase=phase) utils.specgram_summaries(x - xhat, "Difference", hparams, audio=False, phase=phase) utils.specgram_summaries(xhat_linear_half, "Linear Interp. 0.5", hparams, phase=phase) utils.specgram_summaries(xhat_pitch_plus_2, "Pitch +2", hparams, phase=phase) utils.specgram_summaries(xhat_pitch_minus_2, "Pitch -2", hparams, phase=phase) return list(names_to_updates.values())
def _update_critic_td3(self, obs, action, next_obs, reward, mask): """Updates parameters of td3 critic given samples from the batch. Args: obs: A tfe.Variable with a batch of observations. action: A tfe.Variable with a batch of actions. next_obs: A tfe.Variable with a batch of next observations. reward: A tfe.Variable with a batch of rewards. mask: A tfe.Variable with a batch of masks. """ # Avoid using tensorflow random functions since it's impossible to get # the state of the random number generator used by TensorFlow. target_action_noise = np.random.normal( size=action.get_shape(), scale=self.policy_noise).astype('float32') target_action_noise = contrib_eager_python_tfe.Variable( target_action_noise) target_action_noise = tf.clip_by_value(target_action_noise, -self.policy_noise_clip, self.policy_noise_clip) noisy_action_targets = self.actor_target( next_obs) + target_action_noise clipped_noisy_action_targets = tf.clip_by_value( noisy_action_targets, -1, 1) if self.use_absorbing_state: # Starting from the goal state we can execute only non-actions. a_mask = tf.maximum(0, mask) q_next1, q_next2 = self.critic_target( next_obs, clipped_noisy_action_targets * a_mask) q_next = tf.reduce_min(tf.concat([q_next1, q_next2], -1), -1, keepdims=True) q_target = reward + self.discount * q_next else: q_next1, q_next2 = self.critic_target( next_obs, clipped_noisy_action_targets) q_next = tf.reduce_min(tf.concat([q_next1, q_next2], -1), -1, keepdims=True) q_target = reward + self.discount * mask * q_next with tf.GradientTape() as tape: q_pred1, q_pred2 = self.critic(obs, action) critic_loss = tf.losses.mean_squared_error( q_target, q_pred1) + tf.losses.mean_squared_error( q_target, q_pred2) grads = tape.gradient(critic_loss, self.critic.variables) self.critic_optimizer.apply_gradients(zip(grads, self.critic.variables), global_step=self.critic_step) if self.use_absorbing_state: with contrib_summary.record_summaries_every_n_global_steps( 100, self.critic_step): a_mask = tf.maximum(0, -mask) if tf.reduce_sum(a_mask).numpy() > 0: contrib_summary.scalar('critic/absorbing_reward', tf.reduce_sum(reward * a_mask) / tf.reduce_sum(a_mask), step=self.critic_step) with contrib_summary.record_summaries_every_n_global_steps( 100, self.critic_step): contrib_summary.scalar('critic/loss', critic_loss, step=self.critic_step)
def _solarize_add(image, addition=0, threshold=128): """If `pixel < threshold`, add `addition` to it and clip between 0 and 255.""" threshold = tf.cast(threshold, image.dtype) added_image = tf.cast(image, tf.int32) + addition added_image = tf.cast(tf.clip_by_value(added_image, 0, 255), tf.uint8) return tf.where_v2(image < threshold, added_image, image)
def __init__(self, *, policy, ob_space, ac_space, nbatch_act, nbatch_train, nsteps, ent_coef, vf_coef, max_grad_norm, mpi_rank_weight=1, comm=None, microbatch_size=None): self.sess = sess = get_session() if MPI is not None and comm is None: comm = MPI.COMM_WORLD with tf.variable_scope('ppo2_model', reuse=tf.AUTO_REUSE): # CREATE OUR THREE MODELS # act_model that is used for sampling act_model = policy(nbatch_act, 1, sess) # Train model for training if microbatch_size is None: train_model = policy(nbatch_train, nsteps, sess) else: train_model = policy(microbatch_size, nsteps, sess) # Eval model for ADR eval_model = policy(1, 1, sess) # CREATE THE PLACEHOLDERS self.A = A = train_model.pdtype.sample_placeholder([None]) self.ADV = ADV = tf.placeholder(tf.float32, [None]) self.R = R = tf.placeholder(tf.float32, [None]) # Keep track of old actor self.OLDNEGLOGPAC = OLDNEGLOGPAC = tf.placeholder(tf.float32, [None]) # Keep track of old critic self.OLDVPRED = OLDVPRED = tf.placeholder(tf.float32, [None]) self.LR = LR = tf.placeholder(tf.float32, []) # Cliprange self.CLIPRANGE = CLIPRANGE = tf.placeholder(tf.float32, []) neglogpac = train_model.pd.neglogp(A) # Calculate the entropy # Entropy is used to improve exploration by limiting the premature convergence to suboptimal policy. entropy = tf.reduce_mean(train_model.pd.entropy()) # CALCULATE THE LOSS # Total loss = Policy gradient loss - entropy * entropy coefficient + Value coefficient * value loss # Clip the value to reduce variability during Critic training # Get the predicted value vpred = train_model.vf vpredclipped = OLDVPRED + tf.clip_by_value(train_model.vf - OLDVPRED, -CLIPRANGE, CLIPRANGE) # Unclipped value vf_losses1 = tf.square(vpred - R) # Clipped value vf_losses2 = tf.square(vpredclipped - R) vf_loss = .5 * tf.reduce_mean(tf.maximum(vf_losses1, vf_losses2)) # Calculate ratio (pi current policy / pi old policy) ratio = tf.exp(OLDNEGLOGPAC - neglogpac) # Defining Loss = - J is equivalent to max J pg_losses = -ADV * ratio pg_losses2 = -ADV * tf.clip_by_value(ratio, 1.0 - CLIPRANGE, 1.0 + CLIPRANGE) # Final PG loss pg_loss = tf.reduce_mean(tf.maximum(pg_losses, pg_losses2)) approxkl = .5 * tf.reduce_mean(tf.square(neglogpac - OLDNEGLOGPAC)) clipfrac = tf.reduce_mean( tf.to_float(tf.greater(tf.abs(ratio - 1.0), CLIPRANGE))) # Total loss loss = pg_loss - entropy * ent_coef + vf_loss * vf_coef # UPDATE THE PARAMETERS USING LOSS # 1. Get the model parameters params = tf.trainable_variables('ppo2_model') # 2. Build our trainer if comm is not None and comm.Get_size() > 1: self.trainer = MpiAdamOptimizer(comm, learning_rate=LR, mpi_rank_weight=mpi_rank_weight, epsilon=1e-5) else: self.trainer = tf.train.AdamOptimizer(learning_rate=LR, epsilon=1e-5) # 3. Calculate the gradients grads_and_var = self.trainer.compute_gradients(loss, params) grads, var = zip(*grads_and_var) if max_grad_norm is not None: # Clip the gradients (normalize) grads, _grad_norm = tf.clip_by_global_norm(grads, max_grad_norm) grads_and_var = list(zip(grads, var)) # zip aggregate each gradient with parameters associated # For instance zip(ABCD, xyza) => Ax, By, Cz, Da self.grads = grads self.var = var self._train_op = self.trainer.apply_gradients(grads_and_var) self.loss_names = [ 'policy_loss', 'value_loss', 'policy_entropy', 'approxkl', 'clipfrac' ] self.stats_list = [pg_loss, vf_loss, entropy, approxkl, clipfrac] self.train_model = train_model self.act_model = act_model self.step = act_model.step self.value = act_model.value self.initial_state = act_model.initial_state self.eval_model = eval_model self.adr_step = eval_model.step self.adr_value = eval_model.value self.adr_initial_state = eval_model.initial_state self.save = functools.partial(save_variables, sess=sess) self.load = functools.partial(load_variables, sess=sess) initialize() global_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="") if MPI is not None: sync_from_root(sess, global_variables, comm=comm) #pylint: disable=E1101
def _qmc_step_fn(self, optimizer_fn, using_kfac, global_step): """Training step for network given the MCMC state. Args: optimizer_fn: A function which takes as argument a LayerCollection object (None) if using_kfac is True (False) and returns the optimizer. using_kfac: True if optimizer_fn creates a instance of kfac.KfacOptimizer and False otherwise. global_step: tensorflow op for global step index. Returns: loss: per-GPU loss tensor with control dependencies for updating network. local_energy: local energy for each walker features: network output for each walker. Raises: RuntimeError: If using_kfac is True and optimizer_fn does not create a kfac.KfacOptimizer instance or the converse. """ # Note layer_collection cannot be modified after the KFac optimizer has been # constructed. if using_kfac: layer_collection = kfac.LayerCollection() else: layer_collection = None walkers = self.data_gen.walkers_per_gpu features, features_sign = self.network(walkers, layer_collection) optimizer = optimizer_fn(layer_collection) if bool(using_kfac) != isinstance(optimizer, kfac.KfacOptimizer): raise RuntimeError('Not using KFac but using_kfac is True.') if layer_collection: layer_collection.register_squared_error_loss(features, reuse=False) with tf.name_scope('local_energy'): kinetic_fn, potential_fn = self.hamiltonian kinetic = kinetic_fn(features, walkers) potential = potential_fn(walkers) local_energy = kinetic + potential loss = tf.reduce_mean(local_energy) replica_context = tf.distribute.get_replica_context() mean_op = tf.distribute.ReduceOp.MEAN mean_loss = replica_context.merge_call( lambda strategy, val: strategy.reduce(mean_op, val), args=(loss, )) grad_loss = local_energy - mean_loss if self._clip_el is not None: # clip_el should be much larger than 1, to avoid bias median = tfp.stats.percentile(grad_loss, 50.0) diff = tf.reduce_mean(tf.abs(grad_loss - median)) grad_loss_clipped = tf.clip_by_value( grad_loss, median - self._clip_el * diff, median + self._clip_el * diff) else: grad_loss_clipped = grad_loss with tf.name_scope('step'): # Create functions which take no arguments and return the ops for applying # an optimisation step. if not optimizer: optimize_step = tf.no_op else: optimize_step = functools.partial( optimizer.minimize, features, global_step=global_step, var_list=self.network.trainable_variables, grad_loss=grad_loss_clipped) if self._check_loss: # Apply optimisation step only if all local energies are well-defined. step = tf.cond(tf.reduce_any(tf.math.is_nan(mean_loss)), tf.no_op, optimize_step) else: # Faster, but less safe: always apply optimisation step. If the # gradients are not well-defined (e.g. loss contains a NaN), then the # network will also be set to NaN. step = optimize_step() # A strategy step function must return tensors, not ops so apply a # control dependency to a dummy op to ensure they're executed. with tf.control_dependencies([step]): loss = tf.identity(loss) return { 'loss': loss, 'local_energies': local_energy, 'features': features, 'features_sign': features_sign }
def maybe_gen_fake_data_based_on_real_data(image, label, reso, min_fake_lesion_ratio, gen_fake_probability): """Remove real lesion and synthesize lesion.""" # TODO(lehou): Replace magic numbers with flag variables. gen_prob_indicator = tf.random_uniform(shape=[], minval=0.0, maxval=1.0, dtype=tf.float32) background_mask = tf.less(label, 0.5) lesion_mask = tf.greater(label, 1.5) liver_mask = tf.logical_not(tf.logical_or(background_mask, lesion_mask)) liver_intensity = tf.boolean_mask(image, liver_mask) lesion_intensity = tf.boolean_mask(image, lesion_mask) intensity_diff = tf.reduce_mean(liver_intensity) - ( tf.reduce_mean(lesion_intensity)) intensity_diff *= 1.15 intensity_diff = tf.cond(tf.is_nan(intensity_diff), lambda: 0.0, lambda: intensity_diff) lesion_liver_ratio = 0.0 lesion_liver_ratio += tf.random.normal(shape=[], mean=0.01, stddev=0.01) lesion_liver_ratio += tf.random.normal(shape=[], mean=0.0, stddev=0.05) lesion_liver_ratio = tf.clip_by_value(lesion_liver_ratio, min_fake_lesion_ratio, min_fake_lesion_ratio + 0.20) fake_lesion_mask = tf.logical_and( _gen_rand_mask(ratio_mean=lesion_liver_ratio, ratio_stddev=0.0, scale=reso // 32, shape=label.shape, smoothness=reso // 32), tf.logical_not(background_mask)) liver_mask = tf.logical_not( tf.logical_or(background_mask, fake_lesion_mask)) # Blur the masks lesion_mask_blur = tf.squeeze( tf.nn.conv3d(tf.expand_dims( tf.expand_dims(tf.cast(lesion_mask, tf.float32), -1), 0), filter=tf.ones([reso // 32] * 3 + [1, 1], tf.float32) / (reso // 32)**3, strides=[1, 1, 1, 1, 1], padding='SAME')) fake_lesion_mask_blur = tf.squeeze( tf.nn.conv3d(tf.expand_dims( tf.expand_dims(tf.cast(fake_lesion_mask, tf.float32), -1), 0), filter=tf.ones([reso // 32] * 3 + [1, 1], tf.float32) / (reso // 32)**3, strides=[1, 1, 1, 1, 1], padding='SAME')) # Remove real lesion and add fake lesion. # If the intensitify is too small (maybe no liver or lesion region labeled), # do not generate fake data. gen_prob_indicator = tf.cond(tf.greater(intensity_diff, 0.0001), lambda: gen_prob_indicator, lambda: 0.0) # pylint: disable=g-long-lambda image = tf.cond( tf.greater(gen_prob_indicator, 1 - gen_fake_probability), lambda: image + intensity_diff * lesion_mask_blur \ - intensity_diff * fake_lesion_mask_blur, lambda: image) label = tf.cond( tf.greater(gen_prob_indicator, 1 - gen_fake_probability), lambda: tf.cast(background_mask, tf.float32) * 0 + \ tf.cast(liver_mask, tf.float32) * 1 + \ tf.cast(fake_lesion_mask, tf.float32) * 2, lambda: label) # pylint: enable=g-long-lambda return image, label
def label_summary(labels): labels = tf.clip_by_value(labels, 0, 3) * int(255 / 3) tf.summary.image('label', tf.cast(labels, tf.uint8), 4)
def _truncated_normal(mean, stddev): v = tf.random.normal(shape=[], mean=mean, stddev=stddev) v = tf.clip_by_value(v, -2 * stddev + mean, 2 * stddev + mean) return v
def ProbFromCounts(counts): return counts / tf.clip_by_value( tf.reduce_sum(counts, axis=1, keepdims=True), 1e-9, 1e9)
def __init__( self, predict_fn: Union[Callable, tf.keras.Model, 'keras.Model'], shape: Tuple[int, ...], distance_fn: str = 'l1', target_proba: float = 1.0, target_class: Union[str, int] = 'other', max_iter: int = 1000, early_stop: int = 50, lam_init: float = 1e-1, max_lam_steps: int = 10, tol: float = 0.05, learning_rate_init=0.1, feature_range: Union[Tuple, str] = (-1e10, 1e10), eps: Union[float, np.ndarray] = 0.01, # feature-wise epsilons init: str = 'identity', decay: bool = True, write_dir: str = None, debug: bool = False, sess: tf.Session = None) -> None: """ Initialize counterfactual explanation method based on Wachter et al. (2017) Parameters ---------- predict_fn Keras or TensorFlow model or any other model's prediction function returning class probabilities shape Shape of input data starting with batch size distance_fn Distance function to use in the loss term target_proba Target probability for the counterfactual to reach target_class Target class for the counterfactual to reach, one of 'other', 'same' or an integer denoting desired class membership for the counterfactual instance max_iter Maximum number of interations to run the gradient descent for (inner loop) early_stop Number of steps after which to terminate gradient descent if all or none of found instances are solutions lam_init Initial regularization constant for the prediction part of the Wachter loss max_lam_steps Maximum number of times to adjust the regularization constant (outer loop) before terminating the search tol Tolerance for the counterfactual target probability learning_rate_init Initial learning rate for each outer loop of lambda feature_range Tuple with min and max ranges to allow for perturbed instances. Min and max ranges can be floats or numpy arrays with dimension (1 x nb of features) for feature-wise ranges eps Gradient step sizes used in calculating numerical gradients, defaults to a single value for all features, but can be passed an array for feature-wise step sizes init Initialization method for the search of counterfactuals, currently must be 'identity' decay Flag to decay learning rate to zero for each outer loop over lambda write_dir Directory to write Tensorboard files to debug Flag to write Tensorboard summaries for debugging sess Optional Tensorflow session that will be used if passed instead of creating or inferring one internally """ super().__init__(meta=copy.deepcopy(DEFAULT_META_CF)) # get params for storage in meta params = locals() remove = ['self', 'predict_fn', 'sess', '__class__'] for key in remove: params.pop(key) self.meta['params'].update(params) self.data_shape = shape self.batch_size = shape[0] self.target_class = target_class # options for the optimizer self.max_iter = max_iter self.lam_init = lam_init self.tol = tol self.max_lam_steps = max_lam_steps self.early_stop = early_stop self.eps = eps self.init = init self.feature_range = feature_range self.target_proba_arr = target_proba * np.ones(self.batch_size) self.debug = debug # check if the passed object is a model and get session is_model, is_keras, model_sess = _check_keras_or_tf(predict_fn) self.meta['params'].update(is_model=is_model, is_keras=is_keras) # if session provided, use it if isinstance(sess, tf.Session): self.sess = sess else: self.sess = model_sess if is_model: # Keras or TF model self.model = True self.predict_fn = predict_fn.predict # type: ignore # array function self.predict_tn = predict_fn # tensor function else: # black-box model self.predict_fn = predict_fn self.predict_tn = None self.model = False self.n_classes = self.predict_fn(np.zeros(shape)).shape[1] # flag to keep track if explainer is fit or not self.fitted = False # set up graph session for optimization (counterfactual search) with tf.variable_scope('cf_search', reuse=tf.AUTO_REUSE): # define variables for original and candidate counterfactual instances, target labels and lambda self.orig = tf.get_variable('original', shape=shape, dtype=tf.float32) self.cf = tf.get_variable( 'counterfactual', shape=shape, dtype=tf.float32, constraint=lambda x: tf.clip_by_value(x, feature_range[0], feature_range[1])) # the following will be a 1-hot encoding of the target class (as predicted by the model) self.target = tf.get_variable('target', shape=(self.batch_size, self.n_classes), dtype=tf.float32) # constant target probability and global step variable self.target_proba = tf.constant(target_proba * np.ones(self.batch_size), dtype=tf.float32, name='target_proba') self.global_step = tf.Variable(0.0, trainable=False, name='global_step') # lambda hyperparameter - placeholder instead of variable as annealed in first epoch self.lam = tf.placeholder(tf.float32, shape=(self.batch_size), name='lam') # define placeholders that will be assigned to relevant variables self.assign_orig = tf.placeholder(tf.float32, shape, name='assing_orig') self.assign_cf = tf.placeholder(tf.float32, shape, name='assign_cf') self.assign_target = tf.placeholder(tf.float32, shape=(self.batch_size, self.n_classes), name='assign_target') # L1 distance and MAD constants # TODO: MADs? ax_sum = list(np.arange(1, len(self.data_shape))) if distance_fn == 'l1': self.dist = tf.reduce_sum(tf.abs(self.cf - self.orig), axis=ax_sum, name='l1') else: logger.exception('Distance metric %s not supported', distance_fn) raise ValueError # distance loss self.loss_dist = self.lam * self.dist # prediction loss if not self.model: # will need to calculate gradients numerically self.loss_opt = self.loss_dist else: # autograd gradients throughout self.pred_proba = self.predict_tn(self.cf) # 3 cases for target_class if target_class == 'same': self.pred_proba_class = tf.reduce_max( self.target * self.pred_proba, 1) elif target_class == 'other': self.pred_proba_class = tf.reduce_max( (1 - self.target) * self.pred_proba, 1) elif target_class in range(self.n_classes): # if class is specified, this is known in advance self.pred_proba_class = tf.reduce_max( tf.one_hot( target_class, self.n_classes, dtype=tf.float32) * self.pred_proba, 1) else: logger.exception('Target class %s unknown', target_class) raise ValueError self.loss_pred = tf.square(self.pred_proba_class - self.target_proba) self.loss_opt = self.loss_pred + self.loss_dist # optimizer if decay: self.learning_rate = tf.train.polynomial_decay( learning_rate_init, self.global_step, self.max_iter, 0.0, power=1.0) else: self.learning_rate = tf.convert_to_tensor(learning_rate_init) # TODO optional argument to change type, learning rate scheduler opt = tf.train.AdamOptimizer(self.learning_rate) # first compute gradients, then apply them self.compute_grads = opt.compute_gradients(self.loss_opt, var_list=[self.cf]) self.grad_ph = tf.placeholder(shape=shape, dtype=tf.float32, name='grad_cf') grad_and_var = [(self.grad_ph, self.cf)] self.apply_grads = opt.apply_gradients( grad_and_var, global_step=self.global_step) # variables to initialize self.setup = [] # type: list self.setup.append(self.orig.assign(self.assign_orig)) self.setup.append(self.cf.assign(self.assign_cf)) self.setup.append(self.target.assign(self.assign_target)) self.tf_init = tf.variables_initializer(var_list=tf.global_variables( scope='cf_search')) # tensorboard if write_dir is not None: self.writer = tf.summary.FileWriter(write_dir, tf.get_default_graph()) self.writer.add_graph(tf.get_default_graph()) # return templates self.instance_dict = dict.fromkeys( ['X', 'distance', 'lambda', 'index', 'class', 'proba', 'loss']) self.return_dict = copy.deepcopy(DEFAULT_DATA_CF) self.return_dict['all'] = {i: [] for i in range(self.max_lam_steps)}
def model_fn(features, labels, mode, params, config): """Builds the model function for use in an estimator. Arguments: features: The input features for the estimator. labels: The labels, unused here. mode: Signifies whether it is train or test or predict. params: Some parameters, unused here. config: The RunConfig, unused here. Returns: EstimatorSpec: A tf.estimator.EstimatorSpec instance. """ del labels, params, config if FLAGS.analytic_kl and FLAGS.mixture_components != 1: raise NotImplementedError( "Using `analytic_kl` is only supported when `mixture_components = 1` " "since there's no closed form otherwise.") if FLAGS.floating_prior and not (FLAGS.unit_posterior and FLAGS.mixture_components == 1): raise NotImplementedError( "Using `floating_prior` is only supported when `unit_posterior` = True " "since there's a scale ambiguity otherwise, and when " "`mixture_components = 1` since there's no closed form otherwise.") if FLAGS.fitted_samples and FLAGS.mixture_components != 1: raise NotImplementedError( "Using `fitted_samples` is only supported when " "`mixture_components = 1` since there's no closed form otherwise.") if FLAGS.bilbo and not FLAGS.floating_prior: raise NotImplementedError( "Using `bilbo` is only supported when `floating_prior = True`.") activation = tf.nn.leaky_relu encoder = make_encoder(activation, FLAGS.latent_size, FLAGS.base_depth) decoder = make_decoder(activation, FLAGS.latent_size, [IMAGE_SIZE] * 2 + [3], FLAGS.base_depth) approx_posterior = encoder(features) approx_posterior_sample = approx_posterior.sample(FLAGS.n_samples) decoder_mu = decoder(approx_posterior_sample) if FLAGS.floating_prior or FLAGS.fitted_samples: posterior_batch_mean = tf.reduce_mean(approx_posterior.mean()**2, [0]) posterior_batch_variance = tf.reduce_mean(approx_posterior.stddev()**2, [0]) posterior_scale = posterior_batch_mean + posterior_batch_variance floating_prior = tfd.MultivariateNormalDiag( tf.zeros(FLAGS.latent_size), tf.sqrt(posterior_scale)) tf.summary.scalar("posterior_scale", tf.reduce_sum(posterior_scale)) if FLAGS.floating_prior: latent_prior = floating_prior else: latent_prior = make_mixture_prior(FLAGS.latent_size, FLAGS.mixture_components) # Decode samples from the prior for visualization. if FLAGS.fitted_samples: sample_distribution = floating_prior else: sample_distribution = latent_prior n_samples = VIZ_GRID_SIZE**2 random_mu = decoder(sample_distribution.sample(n_samples)) residual = tf.reshape(features - decoder_mu, [-1] + [IMAGE_SIZE] * 2 + [3]) if FLAGS.use_students_t: nll = adaptive.image_lossfun( residual, color_space=FLAGS.color_space, representation=FLAGS.representation, wavelet_num_levels=FLAGS.wavelet_num_levels, wavelet_scale_base=FLAGS.wavelet_scale_base, use_students_t=FLAGS.use_students_t, scale_lo=FLAGS.scale_lo, scale_init=FLAGS.scale_init)[0] else: nll = adaptive.image_lossfun( residual, color_space=FLAGS.color_space, representation=FLAGS.representation, wavelet_num_levels=FLAGS.wavelet_num_levels, wavelet_scale_base=FLAGS.wavelet_scale_base, use_students_t=FLAGS.use_students_t, alpha_lo=FLAGS.alpha_lo, alpha_hi=FLAGS.alpha_hi, alpha_init=FLAGS.alpha_init, scale_lo=FLAGS.scale_lo, scale_init=FLAGS.scale_init)[0] nll = tf.reshape(nll, [tf.shape(decoder_mu)[0], tf.shape(decoder_mu)[1]] + [IMAGE_SIZE] * 2 + [3]) # Clipping to prevent the loss from nanning out. max_val = np.finfo(np.float32).max nll = tf.clip_by_value(nll, -max_val, max_val) viz_n_inputs = np.int32(np.minimum(VIZ_MAX_N_INPUTS, FLAGS.batch_size)) viz_n_samples = np.int32(np.minimum(VIZ_MAX_N_SAMPLES, FLAGS.n_samples)) image_tile_summary("input", tf.to_float(features), rows=1, cols=viz_n_inputs) image_tile_summary( "recon/mean", decoder_mu[:viz_n_samples, :viz_n_inputs], rows=viz_n_samples, cols=viz_n_inputs) img_summary_input = image_tile_summary( "input1", tf.to_float(features), rows=viz_n_inputs, cols=1) img_summary_recon = image_tile_summary( "recon1", decoder_mu[:1, :viz_n_inputs], rows=viz_n_inputs, cols=1) image_tile_summary( "random/mean", random_mu, rows=VIZ_GRID_SIZE, cols=VIZ_GRID_SIZE) distortion = tf.reduce_sum(nll, axis=[2, 3, 4]) avg_distortion = tf.reduce_mean(distortion) tf.summary.scalar("distortion", avg_distortion) if FLAGS.analytic_kl: rate = tfd.kl_divergence(approx_posterior, latent_prior) else: rate = ( approx_posterior.log_prob(approx_posterior_sample) - latent_prior.log_prob(approx_posterior_sample)) avg_rate = tf.reduce_mean(rate) tf.summary.scalar("rate", avg_rate) elbo_local = -(rate + distortion) elbo = tf.reduce_mean(elbo_local) tf.summary.scalar("elbo", elbo) if FLAGS.bilbo: bilbo = -0.5 * tf.reduce_sum( tf.log1p( posterior_batch_mean / posterior_batch_variance)) - avg_distortion tf.summary.scalar("bilbo", bilbo) loss = -bilbo else: loss = -elbo importance_weighted_elbo = tf.reduce_mean( tf.reduce_logsumexp(elbo_local, axis=0) - tf.math.log(tf.to_float(FLAGS.n_samples))) tf.summary.scalar("elbo/importance_weighted", importance_weighted_elbo) # Perform variational inference by minimizing the -ELBO. global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.cosine_decay( FLAGS.learning_rate, tf.maximum( tf.cast(0, tf.int64), global_step - int(FLAGS.decay_start * FLAGS.max_steps)), int((1. - FLAGS.decay_start) * FLAGS.max_steps)) tf.summary.scalar("learning_rate", learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimizer.minimize(loss, global_step=global_step) else: train_op = None eval_metric_ops = {} eval_metric_ops["elbo"] = tf.metrics.mean(elbo) eval_metric_ops["elbo/importance_weighted"] = tf.metrics.mean( importance_weighted_elbo) eval_metric_ops["rate"] = tf.metrics.mean(avg_rate) eval_metric_ops["distortion"] = tf.metrics.mean(avg_distortion) # This ugly hackery is necessary to get TF to visualize when running the # eval set, apparently. eval_metric_ops["img_summary_input"] = (img_summary_input, tf.no_op()) eval_metric_ops["img_summary_recon"] = (img_summary_recon, tf.no_op()) eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()} return tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, )
def _ensure_eos(k, v): if k not in feature_keys: return v return tf.concat([v[0:-1], tf.clip_by_value(v[-1:], 0, 1)], axis=0)
def compress(args): """Compresses an image, or a batch of images of the same shape in npy format.""" from configs import get_eval_batch_size if args.input_file.endswith('.npy'): # .npy file should contain N images of the same shapes, in the form of an array of shape [N, H, W, 3] X = np.load(args.input_file) else: # Load input image and add batch dimension. from PIL import Image x = np.asarray(Image.open(args.input_file).convert('RGB')) X = x[None, ...] num_images = int(X.shape[0]) num_pixels = int(np.prod(X.shape[1:-1])) X = X.astype('float32') X /= 255. eval_batch_size = get_eval_batch_size(num_pixels) dataset = tf.data.Dataset.from_tensor_slices(X) dataset = dataset.batch(batch_size=eval_batch_size) # https://www.tensorflow.org/api_docs/python/tf/compat/v1/data/Iterator # Importantly, each sess.run(op) call will consume a new batch, where op is any operation that depends on # x. Therefore if multiple ops need to be evaluated on the same batch of data, they have to be grouped like # sess.run([op1, op2, ...]). x = dataset.make_one_shot_iterator().get_next() graph = build_graph(args, x, training=False) y_likelihoods, z_likelihoods, x_tilde, = graph['y_likelihoods'], graph[ 'z_likelihoods'], graph['x_tilde'] log_q_z_tilde = graph['log_q_z_tilde'] # Total number of bits divided by number of pixels. axes_except_batch = list(range(1, len(x.shape))) # should be [1,2,3] bpp_back = tf.reduce_sum(-log_q_z_tilde, axis=axes_except_batch) / (np.log(2) * num_pixels) y_bpp = tf.reduce_sum(-tf.log(y_likelihoods), axis=axes_except_batch) / (np.log(2) * num_pixels) z_bpp = tf.reduce_sum(-tf.log(z_likelihoods), axis=axes_except_batch) / (np.log(2) * num_pixels) eval_bpp = y_bpp + z_bpp - bpp_back # shape (N,) # Bring both images back to 0..255 range. x *= 255 x_tilde = tf.clip_by_value(x_tilde, 0, 1) x_tilde = tf.round(x_tilde * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_tilde), axis=axes_except_batch) # shape (N,) psnr = tf.image.psnr(x_tilde, x, 255) # shape (N,) msssim = tf.image.ssim_multiscale(x_tilde, x, 255) # shape (N,) msssim_db = -10 * tf.log(1 - msssim) / np.log(10) # shape (N,) with tf.Session() as sess: # Load the latest model checkpoint, get compression stats save_dir = os.path.join(args.checkpoint_dir, args.runname) latest = tf.train.latest_checkpoint(checkpoint_dir=save_dir) tf.train.Saver().restore(sess, save_path=latest) eval_fields = [ 'mse', 'psnr', 'msssim', 'msssim_db', 'est_bpp', 'est_y_bpp', 'est_z_bpp', 'est_bpp_back' ] eval_tensors = [ mse, psnr, msssim, msssim_db, eval_bpp, y_bpp, z_bpp, bpp_back ] all_results_arrs = {key: [] for key in eval_fields } # append across all batches while True: try: # If requested, transform the quantized image back and measure performance. eval_arrs = sess.run(eval_tensors) for field, arr in zip(eval_fields, eval_arrs): all_results_arrs[field] += arr.tolist() except tf.errors.OutOfRangeError: break for field in eval_fields: all_results_arrs[field] = np.asarray(all_results_arrs[field]) input_file = os.path.basename(args.input_file) results_dict = all_results_arrs trained_script_name = args.runname.split('-')[0] script_name = os.path.splitext(os.path.basename(__file__))[ 0] # current script name, without extension save_file = 'rd-%s-input=%s.npz' % (args.runname, input_file) if script_name != trained_script_name: save_file = 'rd-%s+%s-input=%s.npz' % (script_name, args.runname, input_file) np.savez(os.path.join(args.results_dir, save_file), **results_dict) for field in eval_fields: arr = all_results_arrs[field] print('Avg {}: {:0.4f}'.format(field, arr.mean()))
def main(_): # load 3dmm basis3dmm = load_3dmm_basis( FLAGS.basis3dmm_path, FLAGS.uv_path, is_whole_uv=True, ) if os.path.exists(FLAGS.output_dir) is False: os.makedirs(FLAGS.output_dir) """ build graph """ front_image_batch = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1, None, None, 3], name="front_image") front_image_batch_resized = tf.image.resize(front_image_batch, (FLAGS.uv_size, FLAGS.uv_size)) front_seg_batch = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1, None, None, 19], name="front_seg") front_proj_xyz_batch = tf.compat.v1.placeholder( dtype=tf.float32, shape=[1, basis3dmm["basis_shape"].shape[1] // 3, 3], name="front_proj_xyz", ) front_ver_norm_batch = tf.compat.v1.placeholder( dtype=tf.float32, shape=[1, basis3dmm["basis_shape"].shape[1] // 3, 3], name="front_ver_norm", ) base_uv_path = "../resources/base_tex.png" base_uv = Image.open(base_uv_path).resize((FLAGS.uv_size, FLAGS.uv_size)) base_uv = np.asarray(base_uv, np.float32) / 255 base_uv_batch = tf.constant(base_uv[np.newaxis, ...], name="base_uv") if FLAGS.is_mult_view: left_image_batch = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1, None, None, 3], name="left_image") left_image_batch_resized = tf.image.resize( left_image_batch, (FLAGS.uv_size, FLAGS.uv_size)) left_seg_batch = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1, None, None, 19], name="left_seg") left_proj_xyz_batch = tf.compat.v1.placeholder( dtype=tf.float32, shape=[1, basis3dmm["basis_shape"].shape[1] // 3, 3], name="left_proj_xyz", ) left_ver_norm_batch = tf.compat.v1.placeholder( dtype=tf.float32, shape=[1, basis3dmm["basis_shape"].shape[1] // 3, 3], name="left_ver_norm", ) right_image_batch = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1, None, None, 3], name="right_image") right_image_batch_resized = tf.image.resize( right_image_batch, (FLAGS.uv_size, FLAGS.uv_size)) right_seg_batch = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1, None, None, 19], name="right_seg") right_proj_xyz_batch = tf.compat.v1.placeholder( dtype=tf.float32, shape=[1, basis3dmm["basis_shape"].shape[1] // 3, 3], name="right_proj_xyz", ) right_ver_norm_batch = tf.compat.v1.placeholder( dtype=tf.float32, shape=[1, basis3dmm["basis_shape"].shape[1] // 3, 3], name="right_ver_norm", ) # read fixed blending masks for multiview front_mask_path = "../resources/mid_blend_mask.png" left_mask_path = "../resources/left_blend_mask.png" right_mask_path = "../resources/right_blend_mask.png" front_mask = (np.asarray( Image.open(front_mask_path).resize((FLAGS.uv_size, FLAGS.uv_size)), np.float32, ) / 255) left_mask = (np.asarray( Image.open(left_mask_path).resize((FLAGS.uv_size, FLAGS.uv_size)), np.float32, ) / 255) right_mask = (np.asarray( Image.open(right_mask_path).resize((FLAGS.uv_size, FLAGS.uv_size)), np.float32, ) / 255) mask_front_batch = tf.constant(front_mask[np.newaxis, ..., np.newaxis], tf.float32, name="mask_front") mask_left_batch = tf.constant(left_mask[np.newaxis, ..., np.newaxis], tf.float32, name="mask_left") mask_right_batch = tf.constant(right_mask[np.newaxis, ..., np.newaxis], tf.float32, name="mask_right") front_uv_batch, front_uv_mask_batch = unwrap_utils.unwrap_img_into_uv( front_image_batch_resized / 255.0, front_proj_xyz_batch * FLAGS.uv_size / 300, front_ver_norm_batch, basis3dmm, FLAGS.uv_size, ) front_uv_seg_batch, _ = unwrap_utils.unwrap_img_into_uv( front_seg_batch, front_proj_xyz_batch, front_ver_norm_batch, basis3dmm, FLAGS.uv_size, ) if FLAGS.is_mult_view: left_uv_batch, left_uv_mask_batch = unwrap_utils.unwrap_img_into_uv( left_image_batch_resized / 255.0, left_proj_xyz_batch * FLAGS.uv_size / 300, left_ver_norm_batch, basis3dmm, FLAGS.uv_size, ) left_uv_seg_batch, _ = unwrap_utils.unwrap_img_into_uv( left_seg_batch, left_proj_xyz_batch, left_ver_norm_batch, basis3dmm, FLAGS.uv_size, ) right_uv_batch, right_uv_mask_batch = unwrap_utils.unwrap_img_into_uv( right_image_batch_resized / 255.0, right_proj_xyz_batch * FLAGS.uv_size / 300, right_ver_norm_batch, basis3dmm, FLAGS.uv_size, ) right_uv_seg_batch, _ = unwrap_utils.unwrap_img_into_uv( right_seg_batch, right_proj_xyz_batch, right_ver_norm_batch, basis3dmm, FLAGS.uv_size, ) # blend multiview left_uv_seg_mask_batch = unwrap_utils.get_mask_from_seg( left_uv_seg_batch) right_uv_seg_mask_batch = unwrap_utils.get_mask_from_seg( right_uv_seg_batch) front_uv_seg_mask_batch = unwrap_utils.get_mask_from_seg( front_uv_seg_batch) cur_seg = tf_blend_uv( left_uv_seg_mask_batch, right_uv_seg_mask_batch, mask_right_batch, match_color=False, ) uv_seg_mask_batch = tf_blend_uv(cur_seg, front_uv_seg_mask_batch, mask_front_batch, match_color=False) mask_batch = tf.clip_by_value( mask_front_batch + mask_left_batch + mask_right_batch, 0, 1) uv_mask_batch = mask_batch * uv_seg_mask_batch cur_uv = tf_blend_uv(left_uv_batch, right_uv_batch, mask_right_batch, match_color=False) cur_uv = tf_blend_uv(cur_uv, front_uv_batch, mask_front_batch, match_color=False) uv_batch = tf_blend_uv(base_uv_batch, cur_uv, uv_mask_batch, match_color=True) else: uv_seg_mask_batch = unwrap_utils.get_mask_from_seg(front_uv_seg_batch) uv_mask_batch = front_uv_mask_batch * uv_seg_mask_batch uv_batch = tf_blend_uv(base_uv_batch, front_uv_batch, uv_mask_batch, match_color=True) uv_batch = tf.identity(uv_batch, name="uv_tex") uv_seg_mask_batch = tf.identity(uv_seg_mask_batch, name="uv_seg") uv_mask_batch = tf.identity(uv_mask_batch, name="uv_mask") init_op = tf.compat.v1.global_variables_initializer() sess = tf.compat.v1.Session() if FLAGS.write_graph: tf.io.write_graph(sess.graph_def, "", FLAGS.pb_path, as_text=True) exit() """ load data """ # seg: [300,300,19], segmentation # diffuse: [300,300,3], diffuse images # proj_xyz: [N,3] # ver_norm: [N,3] info_paths = glob.glob(os.path.join(FLAGS.input_dir, "*texture.mat")) for info_path in info_paths: info = scipy.io.loadmat(info_path) if FLAGS.is_mult_view: assert info["proj_xyz"].shape[0] >= 3 # front, left, right if FLAGS.is_orig_img: front_img = info["ori_img"][0][np.newaxis, ...] left_img = info["ori_img"][1][np.newaxis, ...] right_img = info["ori_img"][2][np.newaxis, ...] else: front_img = info["diffuse"][0][np.newaxis, ...] left_img = info["diffuse"][1][np.newaxis, ...] right_img = info["diffuse"][2][np.newaxis, ...] uv_tex_res, uv_mask_res = sess.run( [uv_batch, uv_mask_batch], { front_image_batch: front_img, front_proj_xyz_batch: info["proj_xyz"][0:1, ...], front_ver_norm_batch: info["ver_norm"][0:1, ...], front_seg_batch: info["seg"][0:1, ...], left_image_batch: left_img, left_proj_xyz_batch: info["proj_xyz"][1:2, ...], left_ver_norm_batch: info["ver_norm"][1:2, ...], left_seg_batch: info["seg"][1:2, ...], right_image_batch: right_img, right_proj_xyz_batch: info["proj_xyz"][2:3, ...], right_ver_norm_batch: info["ver_norm"][2:3, ...], right_seg_batch: info["seg"][2:3, ...], }, ) else: print(info["proj_xyz"].shape[0]) assert info["proj_xyz"].shape[0] >= 1 if FLAGS.is_orig_img: front_img = info["ori_img"][0][np.newaxis, ...] else: front_img = info["diffuse"][0][np.newaxis, ...] uv_tex_res, uv_mask_res = sess.run( [uv_batch, uv_mask_batch], { front_image_batch: front_img, front_proj_xyz_batch: info["proj_xyz"][0:1, ...], front_ver_norm_batch: info["ver_norm"][0:1, ...], front_seg_batch: info["seg"][0:1, ...], }, ) uv_tex_res = uv_tex_res[0] uv_mask_res = uv_mask_res[0] prefix = info_path.split("/")[-1].split(".")[0] uv_tex_res = uv_tex_res * 255 uv_mask_res = uv_mask_res * 255 Image.fromarray(uv_tex_res.astype(np.uint8)).save( os.path.join(FLAGS.output_dir, prefix + "_tex.png")) Image.fromarray(np.squeeze(uv_mask_res).astype(np.uint8)).save( os.path.join(FLAGS.output_dir, prefix + "_mask.png")) sess.close()
def run_box_to_gaussian(logdir, verbose=False): """Run a box-blur-to-Gaussian-blur demonstration. See the summary description for more details. Arguments: logdir: Directory into which to write event logs. verbose: Boolean; whether to log any output. """ if verbose: logger.info("--- Starting run: box_to_gaussian") tf.reset_default_graph() tf.set_random_seed(0) image = get_image(verbose=verbose) blur_radius = tf.placeholder(shape=(), dtype=tf.int32) with tf.name_scope("filter"): blur_side_length = blur_radius * 2 + 1 pixel_filter = tf.ones((blur_side_length, blur_side_length)) pixel_filter = pixel_filter / tf.cast(tf.size(input=pixel_filter), tf.float32) # normalize iterations = 4 images = [tf.cast(image, tf.float32) / 255.0] for _ in xrange(iterations): images.append(convolve(images[-1], pixel_filter)) with tf.name_scope("convert_to_uint8"): images = tf.stack([ tf.cast(255 * tf.clip_by_value(image_, 0.0, 1.0), tf.uint8) for image_ in images ]) summ = image_summary.op( "box_to_gaussian", images, max_outputs=iterations, display_name="Gaussian blur as a limit process of box blurs", description=( "Demonstration of forming a Gaussian blur by " "composing box blurs, each of which can be expressed " "as a 2D convolution.\n\n" "A Gaussian blur is formed by convolving a Gaussian " "kernel over an image. But a Gaussian kernel is " "itself the limit of convolving a constant kernel " "with itself many times. Thus, while applying " "a box-filter convolution just once produces " "results that are noticeably different from those " "of a Gaussian blur, repeating the same convolution " "just a few times causes the result to rapidly " "converge to an actual Gaussian blur.\n\n" "Here, the step value controls the blur radius, " "and the image sample controls the number of times " "that the convolution is applied (plus one). " "So, when *sample*=1, the original image is shown; " "*sample*=2 shows a box blur; and a hypothetical " "*sample*=∞ would show a true Gaussian blur.\n\n" "This is one ingredient in a recipe to compute very " "fast Gaussian blurs. The other pieces require " "special treatment for the box blurs themselves " "(decomposition to dual one-dimensional box blurs, " "each of which is computed with a sliding window); " "we don’t perform those optimizations here.\n\n" "[Here are some slides describing the full process.]" "(%s)\n\n" "%s" % ( "http://elynxsdk.free.fr/ext-docs/Blur/Fast_box_blur.pdf", IMAGE_CREDIT, )), ) with tf.Session() as sess: sess.run(image.initializer) writer = tf.summary.FileWriter(os.path.join(logdir, "box_to_gaussian")) writer.add_graph(sess.graph) for step in xrange(8): if verbose: logger.info("--- box_to_gaussian: step: %s" % step) feed_dict = {blur_radius: step} run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() s = sess.run( summ, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata, ) writer.add_summary(s, global_step=step) writer.add_run_metadata(run_metadata, "step_%04d" % step) writer.close()
def lerp_clip(a, b, t): with tf.name_scope('LerpClip'): return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0)