def add_dyprune(weights): crate = config.crate[weights.name[:-2]] #hyperpara C rate prune_mask = tf.Variable(tf.ones_like(weights),name=weights.name[:-2]+'mask', trainable=False) #calculate mask mean = tf.divide(tf.reduce_sum(tf.multiply(tf.abs(weights),prune_mask)),tf.reduce_sum(prune_mask)) var = tf.multiply(weights,prune_mask) var = tf.square(var) mean_q = tf.square(mean)*tf.reduce_sum(prune_mask) var = tf.reduce_sum(var) - mean_q var = tf.divide(var,tf.reduce_sum(prune_mask)) var = tf.sqrt(var) t1_lower = (mean+var*crate)*0.25 #hyperpara a t1_upper = (mean+var*crate)*0.45 #hyperpara b indicator_lower1 = tf.greater_equal(tf.abs(weights), tf.ones_like(weights) * t1_lower) indicator_upper1 = tf.greater_equal(tf.abs(weights), tf.ones_like(weights) * t1_upper) indicator_matrix1 = tf.greater_equal(prune_mask, tf.zeros_like(weights)) indicator_matrix1 = tf.logical_and(indicator_matrix1,indicator_lower1) indicator_matrix1 = tf.logical_or(indicator_matrix1,indicator_upper1) indicator_matrix1 = tf.to_float(indicator_matrix1) update = prune_mask.assign(indicator_matrix1) prune_fc = tf.multiply(weights, prune_mask) return prune_fc
def compute_nats_and_bits_per_dim(data_dim, latent_dim, average_reconstruction, average_prior): """Computes negative ELBO, which is an upper bound on the negative likelihood. Args: data_dim: int-like indicating data dimensionality. latent_dim: int-like indicating latent dimensionality. average_reconstruction: Scalar Tensor indicating the reconstruction cost averaged over all data dimensions and any data batches. average_prior: Scalar Tensor indicating the negative log-prior probability averaged over all latent dimensions and any data batches. Returns: Tuple of scalar Tensors, representing the nats and bits per data dimension (e.g., subpixels) respectively. """ with tf.name_scope(None, default_name="compute_nats_per_dim"): data_dim = tf.cast(data_dim, average_reconstruction.dtype) latent_dim = tf.cast(latent_dim, average_prior.dtype) negative_log_likelihood = data_dim * average_reconstruction negative_log_prior = latent_dim * average_prior negative_elbo = negative_log_likelihood + negative_log_prior nats_per_dim = tf.divide(negative_elbo, data_dim, name="nats_per_dim") bits_per_dim = tf.divide(nats_per_dim, tf.log(2.), name="bits_per_dim") return nats_per_dim, bits_per_dim
def init_training_graph(self): with tf.name_scope('Evaluation'): logits = self.last prob_b = tf.squeeze(logits, squeeze_dims=[1,2]) self.predictions = tf.argmax(prob_b, axis=1) with tf.name_scope('Loss'): self.loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prob_b, labels=tf.cast(self.train_labels_node, tf.int32), name="entropy"))) tf.summary.scalar("entropy", self.loss) with tf.name_scope('Accuracy'): LabelInt = tf.cast(self.train_labels_node, tf.int64) CorrectPrediction = tf.equal(self.predictions, LabelInt) self.accuracy = tf.reduce_mean(tf.cast(CorrectPrediction, tf.float32)) tf.summary.scalar("accuracy", self.accuracy) with tf.name_scope('Prediction'): self.TP = tf.count_nonzero(self.predictions * LabelInt) self.TN = tf.count_nonzero((self.predictions - 1) * (LabelInt - 1)) self.FP = tf.count_nonzero(self.predictions * (LabelInt - 1)) self.FN = tf.count_nonzero((self.predictions - 1) * LabelInt) with tf.name_scope('Precision'): self.precision = tf.divide(self.TP, tf.add(self.TP, self.FP)) tf.summary.scalar('Precision', self.precision) with tf.name_scope('Recall'): self.recall = tf.divide(self.TP, tf.add(self.TP, self.FN)) tf.summary.scalar('Recall', self.recall) with tf.name_scope('F1'): num = tf.multiply(self.precision, self.recall) dem = tf.add(self.precision, self.recall) self.F1 = tf.scalar_mul(2, tf.divide(num, dem)) tf.summary.scalar('F1', self.F1) with tf.name_scope('MeanAccuracy'): Nprecision = tf.divide(self.TN, tf.add(self.TN, self.FN)) self.MeanAcc = tf.divide(tf.add(self.precision, Nprecision) ,2) #self.batch = tf.Variable(0, name = "batch_iterator") self.train_prediction = tf.nn.softmax(logits) self.test_prediction = tf.nn.softmax(logits) tf.global_variables_initializer().run() print('Computational graph initialised')
def add_tensorboard(self, session, tensorboard_dir, tb_run_name=None, timeline_enabled=False): """ Add the tensorboard operations to the acoustic RNN This method will add ops to feed tensorboard self.train_summaries_op : will produce the summary for a training step self.test_summaries_op : will produce the summary for a test step self.summary_writer_op : will write the summary to disk Parameters ---------- :param session: the tensorflow session :param tensorboard_dir: path to tensorboard directory :param tb_run_name: directory name for the tensorboard files inside tensorboard_dir, if None a default dir will be created :param timeline_enabled: enable the output of a trace file for timeline visualization """ self.tensorboard_dir = tensorboard_dir self.timeline_enabled = timeline_enabled # Define GraphKeys for TensorBoard graphkey_training = tf.GraphKeys() graphkey_test = tf.GraphKeys() # Learning rate tf.summary.scalar('Learning_rate', self.learning_rate_var, collections=[graphkey_training, graphkey_test]) # Loss with tf.name_scope('Mean_loss'): mean_loss = tf.divide(self.accumulated_mean_loss, self.mini_batch) tf.summary.scalar('Training', mean_loss, collections=[graphkey_training]) tf.summary.scalar('Test', mean_loss, collections=[graphkey_test]) # Accuracy with tf.name_scope('Accuracy_-_Error_Rate'): mean_error_rate = tf.divide(self.accumulated_error_rate, self.mini_batch) tf.summary.scalar('Training', mean_error_rate, collections=[graphkey_training]) tf.summary.scalar('Test', mean_error_rate, collections=[graphkey_test]) # Hidden state with tf.name_scope('RNN_internal_state'): for idx, state_variable in enumerate(self.rnn_tuple_state): tf.summary.histogram('Training_layer-{0}_cell_state'.format(idx), state_variable[0], collections=[graphkey_training]) tf.summary.histogram('Test_layer-{0}_cell_state'.format(idx), state_variable[0], collections=[graphkey_test]) tf.summary.histogram('Training_layer-{0}_hidden_state'.format(idx), state_variable[1], collections=[graphkey_training]) tf.summary.histogram('Test_layer-{0}_hidden_state'.format(idx), state_variable[1], collections=[graphkey_test]) self.train_summaries_op = tf.summary.merge_all(key=graphkey_training) self.test_summaries_op = tf.summary.merge_all(key=graphkey_test) if tb_run_name is None: run_name = datetime.now().strftime('%Y-%m-%d--%H-%M-%S') else: run_name = tb_run_name self.summary_writer_op = tf.summary.FileWriter(tensorboard_dir + '/' + run_name + '/', graph=session.graph)
def tf_fastfood_transform(in_x, dd, DD, use_get=False, use_C=False): '''Transform from d to D. Pads as necessary. For now: assume dd and DD are known in python.''' # Tensor d and D #assert_D_big = tf.assert_greater_equal(DD, dd, message='d cannot be larger than D') #with tf.control_dependencies([assert_D_big]): # ll = tf.cast(tf.round(tf.log(tf.to_float(DD)) / np.log(2)), 'int32') # LL = tf.pow(2, ll) # Python d and D assert isinstance(dd, int), 'd should be int' assert isinstance(DD, int), 'D should be int' assert DD >= dd, 'd cannot be larger than D' assert dd > 0, 'd and D must be positive' ll = int(np.ceil(np.log(DD) / np.log(2))) LL = 2 ** ll # Make vars init_BB = tf.to_float(tf.random_uniform((LL,), 0, 2, dtype='int32')) * 2 - 1 init_Pi = tf.random_shuffle(tf.range(LL)) init_GG = tf.random_normal((LL,)) init_divisor = lambda GG: tf.sqrt(LL * tf.reduce_sum(tf.pow(GG.initialized_value(), 2))) if use_get: BB = tf.get_variable('B', initializer=init_BB, trainable=False) Pi = tf.get_variable('Pi', initializer=init_Pi, trainable=False) GG = tf.get_variable('G', initializer=init_GG, trainable=False) divisor = tf.get_variable('divisor', initializer=init_divisor(GG), trainable=False) else: BB = tf.Variable(init_BB, name='B', trainable=False) Pi = tf.Variable(init_Pi, name='Pi', trainable=False) GG = tf.Variable(init_GG, name='G', trainable=False) divisor = tf.Variable(init_divisor(GG), name='divisor', trainable=False) fastfood_vars = [BB, Pi, GG, divisor] # Implement transform dd_pad = tf.pad(in_x, [[0, LL - dd]]) mul_1 = tf.multiply(BB, dd_pad) if use_C: mul_2 = tf_fast_walsh_hadamard(mul_1, 0, method='c', normalize=True) else: mul_2 = tf_fast_walsh_hadamard(mul_1, 0, method='two', normalize=False) mul_3 = tf.gather(mul_2, Pi) mul_4 = tf.multiply(mul_3, GG) if use_C: mul_5 = tf_fast_walsh_hadamard(mul_4, 0, method='c', normalize=True) print '\nWARNING: check normalization on this next line more carefully\n' ret = tf.divide(tf.slice(mul_5, [0], [DD]), divisor * np.sqrt(float(DD) / LL / ll)) else: mul_5 = tf_fast_walsh_hadamard(mul_4, 0, method='two', normalize=False) ret = tf.divide(tf.slice(mul_5, [0], [DD]), divisor * np.sqrt(float(DD) / LL)) return fastfood_vars, ret
def logG(x, y, theta): fv = tff(theta,y) gv = tfg(theta,y) mu = tf.add(y,tf.multiply(fv,gl.h)) pr = tf.subtract(x,mu) pr2 = tf.square(pr) gv2 = tf.square(gv) my2 = tf.constant(2.0,dtype=gl.myftype) mypi = tf.constant(np.pi,dtype=gl.myftype) lgp1 = tf.negative(tf.divide(tf.log(tf.multiply(my2*mypi*gl.h,gv2)),my2)) lgp2 = tf.negative(tf.divide(pr2,tf.multiply(my2*gl.h,gv2))) lg = tf.add(lgp1,lgp2) return lg
def init_training_graph(self): with tf.name_scope('Evaluation'): self.logits = self.conv_layer_f(self.last, self.logits_weight, strides=[1,1,1,1], scope_name="logits/") self.predictions = tf.argmax(self.logits, axis=3) with tf.name_scope('Loss'): self.loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.squeeze(tf.cast(self.train_labels_node, tf.int32), squeeze_dims=[3]), name="entropy"))) tf.summary.scalar("entropy", self.loss) with tf.name_scope('Accuracy'): LabelInt = tf.squeeze(tf.cast(self.train_labels_node, tf.int64), squeeze_dims=[3]) CorrectPrediction = tf.equal(self.predictions, LabelInt) self.accuracy = tf.reduce_mean(tf.cast(CorrectPrediction, tf.float32)) tf.summary.scalar("accuracy", self.accuracy) with tf.name_scope('ClassPrediction'): flat_LabelInt = tf.reshape(LabelInt, [-1]) flat_predictions = tf.reshape(self.predictions, [-1]) self.cm = tf.confusion_matrix(flat_LabelInt, flat_predictions, self.NUM_LABELS) flatten_confusion_matrix = tf.reshape(self.cm, [-1]) total = tf.reduce_sum(self.cm) for i in range(self.NUM_LABELS): name = "Label_{}".format(i) TP, TN, FP, FN = GetCMInfo_TF(self.cm, i, self.NUM_LABELS) precision = tf.divide(TP, tf.add(TP, FP)) recall = tf.divide(TP, tf.add(TP, FN)) num = tf.multiply(precision, recall) dem = tf.add(precision, recall) F1 = tf.scalar_mul(2, tf.divide(num, dem)) Nprecision = tf.divide(TN, tf.add(TN, FN)) MeanAcc = tf.divide(tf.add(precision, Nprecision) ,2) tf.summary.scalar(name + '_Precision', precision) tf.summary.scalar(name + '_Recall', recall) tf.summary.scalar(name + '_F1', F1) tf.summary.scalar(name + '_Performance', MeanAcc) confusion_image = tf.reshape( tf.cast( self.cm, tf.float32), [1, self.NUM_LABELS, self.NUM_LABELS, 1]) tf.summary.image('confusion', confusion_image) self.train_prediction = tf.nn.softmax(self.logits) self.test_prediction = self.train_prediction tf.global_variables_initializer().run() print('Computational graph initialised')
def adloss(self,x,xt,y,global_step): with tf.variable_scope('reuse_inference') as scope: scope.reuse_variables() self.inference(x,training=True) source_feature=self.feature scope.reuse_variables() self.inference(xt,training=True) target_feature=self.feature target_pred=self.output with tf.variable_scope('reuse') as scope: source_logits,_=D(source_feature) scope.reuse_variables() target_logits,_=D(target_feature) self.source_feature=source_feature self.target_feature=target_feature self.concat_feature=tf.concat([source_feature,target_feature],0) source_result=tf.argmax(y,1) target_result=tf.argmax(target_pred,1) ones=tf.ones_like(source_feature) current_source_count=tf.unsorted_segment_sum(ones,source_result,self.num_classes) current_target_count=tf.unsorted_segment_sum(ones,target_result,self.num_classes) current_positive_source_count=tf.maximum(current_source_count,tf.ones_like(current_source_count)) current_positive_target_count=tf.maximum(current_target_count,tf.ones_like(current_target_count)) current_source_centroid=tf.divide(tf.unsorted_segment_sum(data=source_feature,segment_ids=source_result,num_segments=self.num_classes),current_positive_source_count) current_target_centroid=tf.divide(tf.unsorted_segment_sum(data=target_feature,segment_ids=target_result,num_segments=self.num_classes),current_positive_target_count) decay=tf.constant(0.3) self.decay=decay target_centroid=(decay)*current_target_centroid+(1.-decay)*self.target_moving_centroid source_centroid=(decay)*current_source_centroid+(1.-decay)*self.source_moving_centroid self.Semanticloss=protoloss(source_centroid,target_centroid) tf.summary.scalar('semanticloss',self.Semanticloss) D_real_loss=tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=target_logits,labels=tf.ones_like(target_logits))) D_fake_loss=tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=source_logits,labels=tf.zeros_like(source_logits))) self.D_loss=D_real_loss+D_fake_loss self.G_loss=-self.D_loss tf.summary.scalar('G_loss',self.G_loss) tf.summary.scalar('JSD',self.G_loss/2+math.log(2)) self.G_loss=0.1*self.G_loss self.D_loss=0.1*self.D_loss return self.G_loss,self.D_loss,source_centroid,target_centroid
def read_tensor_from_image_file(file_name): input_name = "file_reader" output_name = "normalized" width = input_size height = input_size num_channels = 3 file_reader = tf.read_file(file_name, input_name) if file_name.endswith(".png"): image_reader = tf.image.decode_png(file_reader, channels = 3, name='png_reader') elif file_name.endswith(".gif"): image_reader = tf.squeeze(tf.image.decode_gif(file_reader, name='gif_reader')) elif file_name.endswith(".bmp"): image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader') else: image_reader = tf.image.decode_jpeg(file_reader, channels = 3, name='jpeg_reader') float_caster = tf.cast(image_reader, tf.float32) dims_expander = tf.expand_dims(float_caster, 0); # resized = tf.image.resize_bilinear(dims_expander, [input_size, input_size]) normalized = tf.divide(tf.subtract(dims_expander, [input_mean]), [input_std]) patches = tf.extract_image_patches(normalized, ksizes=[1, patch_height, patch_width, 1], strides=[1, patch_height/4, patch_width/4, 1], rates=[1,1,1,1], padding="VALID") patches_shape = tf.shape(patches) patches = tf.reshape(patches, [-1, patch_height, patch_width, num_channels]) patches = tf.image.resize_images(patches, [height, width]) patches = tf.reshape(patches, [-1, height, width, num_channels]) sess = tf.Session() return sess.run([patches, patches_shape])
def compute_categorical_loss_and_accuracy(logits, targets): """return total loss, reg loss (subset of total), and accuracy""" with tf.variable_scope('loss'): regularization_losses = sum( tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES ) ) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=targets ), axis=0, name='loss' ) + regularization_losses preds = tf.nn.softmax(logits, name='preds') correct_preds = tf.equal( tf.argmax(preds, 1), tf.argmax(targets, 1), name='correct_preds' ) accuracy = tf.divide( tf.reduce_sum(tf.cast(correct_preds, tf.float32)), tf.cast(tf.shape(targets)[0], tf.float32), name='accuracy' ) return loss, regularization_losses, accuracy
def running_mean(cost, tag_name, batch_size=1): with tf.name_scope("running_mean_" + tag_name): with tf.variable_scope(tag_name): cost_sum = tf.get_variable( "cost_sum", initializer=tf.zeros_initializer, dtype=tf.float64, shape=(), collections=[tf.GraphKeys.LOCAL_VARIABLES], trainable=False) batches = tf.get_variable( "cost_num_batches", initializer=tf.zeros_initializer, dtype=tf.int32, shape=(), collections=[tf.GraphKeys.LOCAL_VARIABLES], trainable=False) cost_add = tf.assign_add(cost_sum, tf.cast(cost, dtype=tf.float64)) batches_add = tf.assign_add(batches, batch_size) update_cost_mean = tf.group(cost_add, batches_add) reset_batches = tf.assign(batches, 0) reset_cost_sum = tf.assign(cost_sum, 0.0) reset_cost_mean = tf.group(reset_batches, reset_cost_sum) mean_cost = tf.divide( cost_sum, tf.cast(batches, dtype=tf.float64)) train_loss_summary = tf.summary.scalar(tag_name, mean_cost) return reset_cost_mean, update_cost_mean, train_loss_summary
def normalize_to_unit_sum(x, EPS=1e-10): ''' Along the last dim ''' EPS = tf.constant(EPS, dtype=tf.float32) x = x + EPS x_sum = tf.reduce_sum(x, -1, keep_dims=True) x = tf.divide(x, x_sum) return x
def dia(model, config, scope, connectsegment, connectfeature): with tf.variable_scope(scope), tf.name_scope(scope): with tf.variable_scope('inputs'), tf.name_scope('inputs'): model['%s_in0length_segment' %scope] = model['%s_out0length' %connectsegment] model['%s_in1length_segment' %scope] = model['%s_out1length' %connectsegment] model['%s_in2length_segment' %scope] = model['%s_out2length' %connectsegment] model['%s_maxin2length_segment' %scope] = model['%s_maxout2length' %connectsegment] model['%s_in0length_feature' %scope] = model['%s_out0length' %connectfeature] model['%s_in1length_feature' %scope] = model['%s_out1length' %connectfeature] model['%s_in2length_feature' %scope] = model['%s_out2length' %connectfeature] model['%s_maxin2length_feature' %scope] = model['%s_maxout2length' %connectfeature] model['%s_inputs_segment' %scope] = tf.squeeze(model['%s_outputs' %connectsegment], 2, '%s_inputs_segment' %scope) model['%s_inputs_feature' %scope] = tf.unstack(tf.transpose(model['%s_outputs' %connectfeature], [1, 0, 2]), name = '%s_inputs_feature' %scope) model['%s_out0length' %scope] = model['%s_in0length_feature' %scope] model['%s_out1length' %scope] = config.getint('global', 'speaker_size') model['%s_out2length' %scope] = tf.stack([config.getint('global', 'speaker_size') for _ in xrange(model['%s_out0length' %scope])]) model['%s_maxout2length' %scope] = config.getint('global', 'speaker_size') with tf.variable_scope('outputs'), tf.name_scope('outputs'): model['%s_topsegmentvalues' %scope], model['%s_topsegmentindices' %scope] = tf.nn.top_k(tf.transpose(model['%s_inputs_segment' %scope], [1, 0]), config.getint('global', 'speaker_size')) model['%s_scores' %scope] = [tf.gather(feature, index) for feature, index in zip(model['%s_inputs_feature' %scope], tf.unstack(model['%s_topsegmentindices' %scope]))] model['%s_normalizedscores' %scope] = [tf.divide(score, tf.norm(score, 2, 1, True)) for score in model['%s_scores' %scope]] model['%s_outputs' %scope] = tf.add(0.5, tf.multiply(0.5, tf.stack([tf.matmul(score, score, transpose_b = True) for score in model['%s_normalizedscores' %scope]], name = '%s_outputs' %scope))) return model
def read_tensor_from_image_file(self, file_name, input_height=299, input_width=299, input_mean=0, input_std=255): input_name = "file_reader" output_name = "normalized" file_reader = tf.read_file(file_name, input_name) if file_name.endswith(".png"): image_reader = tf.image.decode_png(file_reader, channels=3, name='png_reader') elif file_name.endswith(".gif"): image_reader = tf.squeeze(tf.image.decode_gif(file_reader, name='gif_reader')) elif file_name.endswith(".bmp"): image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader') else: image_reader = tf.image.decode_jpeg(file_reader, channels=3, name='jpeg_reader') float_caster = tf.cast(image_reader, tf.float32) dims_expander = tf.expand_dims(float_caster, 0); resized = tf.image.resize_bilinear(dims_expander, [self.input_height, self.input_width]) normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std]) sess = tf.Session() result = sess.run(normalized) sess.close() return result
def gaussian(y, mu_k, sigma_k): y = tf.reshape(y, [batchDim,1,L_out]) norm = tf.reduce_sum(tf.square(y-mu_k),axis=2) # sums over the L dimensions -> we get shape (N,K) again phi_k = -tf.div(norm, 2*tf.square(sigma_k)) phi_k = tf.exp(phi_k) phi_k = tf.divide(phi_k, sigma_k) return phi_k
def tf_image_processing(tf_images, basenet, crop_size, distort=False, hp_filter=False): if len(tf_images.shape) == 3: tf_images = tf.expand_dims(tf_images, -1) if basenet == 'sketchanet': mean_value = 250.42 tf_images = tf.subtract(tf_images, mean_value) if distort: print("Distorting photos") FLAGS.crop_size = crop_size FLAGS.dist_chn_size = 1 tf_images = data_augmentation(tf_images) else: tf_images = tf.image.resize_images(tf_images, (crop_size, crop_size)) elif basenet in ['inceptionv1', 'inceptionv3', 'gen_cnn']: tf_images = tf.divide(tf_images, 255.0) tf_images = tf.subtract(tf_images, 0.5) tf_images = tf.multiply(tf_images, 2.0) if int(tf_images.shape[-1]) != 3: tf_images = tf.concat([tf_images, tf_images, tf_images], axis=-1) if distort: print("Distorting photos") FLAGS.crop_size = crop_size FLAGS.dist_chn_size = 3 tf_images = data_augmentation(tf_images) # Display the training images in the visualizer. # tf.image_summary('input_images', input_images) else: tf_images = tf.image.resize_images(tf_images, (crop_size, crop_size)) if hp_filter: tf_images = tf_high_pass_filter(tf_images) return tf_images
def InstanceNorm(x, epsilon=1e-5, data_format='NHWC', use_affine=True): """ Instance Normalization, as in the paper: `Instance Normalization: The Missing Ingredient for Fast Stylization <https://arxiv.org/abs/1607.08022>`_. Args: x (tf.Tensor): a 4D tensor. epsilon (float): avoid divide-by-zero use_affine (bool): whether to apply learnable affine transformation """ shape = x.get_shape().as_list() assert len(shape) == 4, "Input of InstanceNorm has to be 4D!" if data_format == 'NHWC': axis = [1, 2] ch = shape[3] new_shape = [1, 1, 1, ch] else: axis = [2, 3] ch = shape[1] new_shape = [1, ch, 1, 1] assert ch is not None, "Input of InstanceNorm require known channel!" mean, var = tf.nn.moments(x, axis, keep_dims=True) if not use_affine: return tf.divide(x - mean, tf.sqrt(var + epsilon), name='output') beta = tf.get_variable('beta', [ch], initializer=tf.constant_initializer()) beta = tf.reshape(beta, new_shape) gamma = tf.get_variable('gamma', [ch], initializer=tf.constant_initializer(1.0)) gamma = tf.reshape(gamma, new_shape) return tf.nn.batch_normalization(x, mean, var, beta, gamma, epsilon, name='output')
def ApplyPcaAndWhitening(data, pca_matrix, pca_mean, output_dim, use_whitening=False, pca_variances=None): """Applies PCA/whitening to data. Args: data: [N, dim] float tensor containing data which undergoes PCA/whitening. pca_matrix: [dim, dim] float tensor PCA matrix, row-major. pca_mean: [dim] float tensor, mean to subtract before projection. output_dim: Number of dimensions to use in output data, of type int. use_whitening: Whether whitening is to be used. pca_variances: [dim] float tensor containing PCA variances. Only used if use_whitening is True. Returns: output: [N, output_dim] float tensor with output of PCA/whitening operation. """ output = tf.matmul( tf.subtract(data, pca_mean), tf.slice(pca_matrix, [0, 0], [output_dim, -1]), transpose_b=True, name='pca_matmul') # Apply whitening if desired. if use_whitening: output = tf.divide( output, tf.sqrt(tf.slice(pca_variances, [0], [output_dim])), name='whitening') return output
def compute_loss(self, input_tensor, label, name): """ 计算损失函数 :param input_tensor: :param label: :param name: :return: """ with tf.variable_scope(name): # 前向传播获取logits inference_ret = self.build_model(input_tensor=input_tensor, name='inference') # 计算损失 decode_logits = inference_ret['logits'] # 加入bounded inverse class weights inverse_class_weights = tf.divide(1.0, tf.log(tf.add(tf.constant(1.02, tf.float32), tf.nn.softmax(decode_logits)))) decode_logits_weighted = tf.multiply(decode_logits, inverse_class_weights) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=decode_logits_weighted, labels=tf.squeeze(label, squeeze_dims=[3]), name='entropy_loss') ret = dict() ret['entropy_loss'] = loss ret['inference_logits'] = inference_ret['logits'] return ret
def compute_loss(self, y_true, y_pred): import tensorflow as tf # just preprocess as a part of the model n_feature_layers = len(self.feat_net.outputs) x1 = self.feat_net(y_true) x2 = self.feat_net(y_pred) loss = [] for li in range(n_feature_layers): x1_l = x1[li] x2_l = x2[li] # unit normalize in channels dimension #x1_norm = tf.sqrt(tf.reduce_sum(x1_l * x1_l, axis=-1, keep_dims=True)) # b x h x w x 1 #x2_norm = tf.sqrt(tf.reduce_sum(x2_l * x2_l, axis=-1, keep_dims=True)) #x1_l_norm = tf.divide(x1_l, x1_norm) # b x h x w x c #x2_l_norm = tf.divide(x2_l, x2_norm) x1_l_norm = norm_vgg(x1_l) x2_l_norm = norm_vgg(x2_l) hw = tf.shape(x1_l)[1] * tf.shape(x1_l)[2] d = tf.reduce_sum(tf.square(x1_l_norm - x2_l_norm), [1, 2, 3]) # bx1 d_mean = tf.divide(d, tf.cast(hw, tf.float32)) if li == 0: loss = d_mean else: loss = loss + d_mean return loss
def _compute_loss(self, prediction_tensor, target_tensor, weights): """Compute loss function. Args: prediction_tensor: A float tensor of shape [batch_size, num_anchors, num_classes] representing the predicted logits for each class target_tensor: A float tensor of shape [batch_size, num_anchors, num_classes] representing logit classification targets weights: a float tensor of shape, either [batch_size, num_anchors, num_classes] or [batch_size, num_anchors, 1]. If the shape is [batch_size, num_anchors, 1], all the classses are equally weighted. Returns: loss: a float tensor of shape [batch_size, num_anchors] representing the value of the loss function. """ weights = tf.reduce_mean(weights, axis=2) num_classes = prediction_tensor.get_shape().as_list()[-1] target_tensor = self._scale_and_softmax_logits(target_tensor) prediction_tensor = tf.divide(prediction_tensor, self._logit_scale, name='scale_logits') per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits( labels=tf.reshape(target_tensor, [-1, num_classes]), logits=tf.reshape(prediction_tensor, [-1, num_classes]))) return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights
def __init__(self, state_values, cumulative_rewards, logits, actions, action_space, beta): ma_adv_norm = tf.get_variable( name="moving_average_of_advantage_norm", dtype=tf.float32, initializer=100.0, trainable=False) # advantage estimation adv = cumulative_rewards - state_values # update averaged advantage norm update_adv_norm = tf.assign_add( ref=ma_adv_norm, value=1e-6 * (tf.reduce_mean(tf.square(adv)) - ma_adv_norm)) # exponentially weighted advantages with tf.control_dependencies([update_adv_norm]): exp_advs = tf.exp( beta * tf.divide(adv, 1e-8 + tf.sqrt(ma_adv_norm))) # log\pi_\theta(a|s) dist_cls, _ = ModelCatalog.get_action_dist(action_space, {}) action_dist = dist_cls(logits) logprobs = action_dist.logp(actions) self.loss = -1.0 * tf.reduce_mean( tf.stop_gradient(exp_advs) * logprobs)
def __init__(self, n_inputs, n_rules, learning_rate=1e-2): self.n = n_inputs self.m = n_rules self.inputs = tf.placeholder(tf.float32, shape=(None, n_inputs)) # Input self.targets = tf.placeholder(tf.float32, shape=None) # Desired output mu = tf.get_variable("mu", [n_rules * n_inputs], initializer=tf.random_normal_initializer(0, 1)) # Means of Gaussian MFS sigma = tf.get_variable("sigma", [n_rules * n_inputs], initializer=tf.random_normal_initializer(0, 1)) # Standard deviations of Gaussian MFS y = tf.get_variable("y", [1, n_rules], initializer=tf.random_normal_initializer(0, 1)) # Sequent centers self.params = tf.trainable_variables() self.rul = tf.reduce_prod( tf.reshape(tf.exp(-0.5 * tf.square(tf.subtract(tf.tile(self.inputs, (1, n_rules)), mu)) / tf.square(sigma)), (-1, n_rules, n_inputs)), axis=2) # Rule activations # Fuzzy base expansion function: num = tf.reduce_sum(tf.multiply(self.rul, y), axis=1) den = tf.clip_by_value(tf.reduce_sum(self.rul, axis=1), 1e-12, 1e12) self.out = tf.divide(num, den) self.loss = tf.losses.huber_loss(self.targets, self.out) # Loss function computation # Other loss functions for regression, uncomment to try them: # loss = tf.sqrt(tf.losses.mean_squared_error(target, out)) # loss = tf.losses.absolute_difference(target, out) self.optimize = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss) # Optimization step # Other optimizers, uncomment to try them: # self.optimize = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(self.loss) # self.optimize = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(self.loss) self.init_variables = tf.global_variables_initializer() # Variable initializer
def _ProcessSingleScale(scale_index, boxes, features, scales, scores, reuse=True): """Resize the image and run feature extraction and keypoint selection. This function will be passed into tf.while_loop() and be called repeatedly. The input boxes are collected from the previous iteration [0: scale_index -1]. We get the current scale by image_scales[scale_index], and run image resizing, feature extraction and keypoint selection. Then we will get a new set of selected_boxes for current scale. In the end, we concat the previous boxes with current selected_boxes as the output. Args: scale_index: A valid index in the image_scales. boxes: Box tensor with the shape of [N, 4]. features: Feature tensor with the shape of [N, depth]. scales: Scale tensor with the shape of [N]. scores: Attention score tensor with the shape of [N]. reuse: Whether or not the layer and its variables should be reused. Returns: scale_index: The next scale index for processing. boxes: Concatenated box tensor with the shape of [K, 4]. K >= N. features: Concatenated feature tensor with the shape of [K, depth]. scales: Concatenated scale tensor with the shape of [K]. scores: Concatenated attention score tensor with the shape of [K]. """ scale = tf.gather(image_scales, scale_index) new_image_size = tf.to_int32(tf.round(original_image_shape_float * scale)) resized_image = tf.image.resize_bilinear(image_tensor, new_image_size) attention, feature_map = model_fn( resized_image, normalized_image=True, reuse=reuse) rf_boxes = CalculateReceptiveBoxes( tf.shape(feature_map)[1], tf.shape(feature_map)[2], rf, stride, padding) # Re-project back to the original image space. rf_boxes = tf.divide(rf_boxes, scale) attention = tf.reshape(attention, [-1]) feature_map = tf.reshape(feature_map, [-1, feature_depth]) # Use attention score to select feature vectors. indices = tf.reshape(tf.where(attention >= abs_thres), [-1]) selected_boxes = tf.gather(rf_boxes, indices) selected_features = tf.gather(feature_map, indices) selected_scores = tf.gather(attention, indices) selected_scales = tf.ones_like(selected_scores, tf.float32) / scale # Concat with the previous result from different scales. boxes = tf.concat([boxes, selected_boxes], 0) features = tf.concat([features, selected_features], 0) scales = tf.concat([scales, selected_scales], 0) scores = tf.concat([scores, selected_scores], 0) return scale_index + 1, boxes, features, scales, scores
def r2_op(predictions, targets): """ r2_op. An op that calculates the standard error. Examples: ```python input_data = placeholder(shape=[None, 784]) y_pred = my_network(input_data) # Apply some ops y_true = placeholder(shape=[None, 10]) # Labels stderr_op = r2_op(y_pred, y_true) # Calculate standard error by feeding data X and labels Y std_error = sess.run(stderr_op, feed_dict={input_data: X, y_true: Y}) ``` Arguments: predictions: `Tensor`. targets: `Tensor`. Returns: `Float`. The standard error. """ with tf.name_scope('StandardError'): a = tf.reduce_sum(tf.square(tf.subtract(targets, predictions))) b = tf.reduce_sum(tf.square(tf.subtract(targets, tf.reduce_mean(targets)))) return tf.subtract(1.0, tf.divide(a, b))
def weighted_r2_op(predictions, targets, inputs): """ weighted_r2_op. An op that calculates the standard error. Examples: ```python input_data = placeholder(shape=[None, 784]) y_pred = my_network(input_data) # Apply some ops y_true = placeholder(shape=[None, 10]) # Labels stderr_op = weighted_r2_op(y_pred, y_true, input_data) # Calculate standard error by feeding data X and labels Y std_error = sess.run(stderr_op, feed_dict={input_data: X, y_true: Y}) ``` Arguments: predictions: `Tensor`. targets: `Tensor`. inputs: `Tensor`. Returns: `Float`. The standard error. """ with tf.name_scope('WeightedStandardError'): if hasattr(inputs, '__len__'): inputs = tf.add_n(inputs) if inputs.get_shape().as_list() != targets.get_shape().as_list(): raise Exception("Weighted R2 metric requires Inputs and Targets to " "have same shape.") a = tf.reduce_sum(tf.square(predictions - inputs)) b = tf.reduce_sum(tf.square(targets - inputs)) return tf.divide(a, b)
def _make_activity_op(self, input_tensor): """ Creates the op for calculating the activity of a SOM :param input_tensor: A tensor to calculate the activity of. Must be of shape `[batch_size, dim]` where `dim` is the dimensionality of the SOM's weights. :return A handle to the newly created activity op: """ with self._graph.as_default(): with tf.name_scope("Activity"): # This constant controls the width of the gaussian. # The closer to 0 it is, the wider it is. c = tf.constant(self._c, dtype="float32") # Get the euclidean distance between each neuron and the input vectors dist = tf.norm(tf.subtract( tf.expand_dims(self._weights, axis=0), tf.expand_dims(input_tensor, axis=1)), name="Distance") # [batch_size, neurons] # Calculate the Gaussian of the activity. Units with distances closer to 0 will have activities # closer to 1. activity = tf.exp(tf.multiply(tf.pow(dist, 2), c), name="Gaussian") # Convert the activity into a softmax probability distribution if self._softmax_activity: activity = tf.divide(tf.exp(activity), tf.expand_dims(tf.reduce_sum(tf.exp(activity), axis=1), axis=-1), name="Softmax") return tf.identity(activity, name="Output")
def instancenorm(inputdata, epsilon=1e-5, data_format='NHWC', use_affine=True, name=None): """ :param name: :param inputdata: :param epsilon: :param data_format: :param use_affine: :return: """ shape = inputdata.get_shape().as_list() if len(shape) != 4: raise ValueError("Input data of instancebn layer has to be 4D tensor") if data_format == 'NHWC': axis = [1, 2] ch = shape[3] new_shape = [1, 1, 1, ch] else: axis = [2, 3] ch = shape[1] new_shape = [1, ch, 1, 1] if ch is None: raise ValueError("Input of instancebn require known channel!") mean, var = tf.nn.moments(inputdata, axis, keep_dims=True) if not use_affine: return tf.divide(inputdata - mean, tf.sqrt(var + epsilon), name='output') beta = tf.get_variable('beta', [ch], initializer=tf.constant_initializer()) beta = tf.reshape(beta, new_shape) gamma = tf.get_variable('gamma', [ch], initializer=tf.constant_initializer(1.0)) gamma = tf.reshape(gamma, new_shape) return tf.nn.batch_normalization(inputdata, mean, var, beta, gamma, epsilon, name=name)
def style_swap(content, style, patch_size, stride): '''Efficiently swap content feature patches with nearest-neighbor style patches Original paper: https://arxiv.org/abs/1612.04337 Adapted from: https://github.com/rtqichen/style-swap/blob/master/lib/NonparametricPatchAutoencoderFactory.lua ''' nC = tf.shape(style)[-1] # Num channels of input content feature and style-swapped output ### Extract patches from style image that will be used for conv/deconv layers style_patches = tf.extract_image_patches(style, [1,patch_size,patch_size,1], [1,stride,stride,1], [1,1,1,1], 'VALID') before_reshape = tf.shape(style_patches) # NxRowsxColsxPatch_size*Patch_size*nC style_patches = tf.reshape(style_patches, [before_reshape[1]*before_reshape[2],patch_size,patch_size,nC]) style_patches = tf.transpose(style_patches, [1,2,3,0]) # Patch_sizexPatch_sizexIn_CxOut_c # Normalize each style patch style_patches_norm = tf.nn.l2_normalize(style_patches, dim=3) # Compute cross-correlation/nearest neighbors of patches by using style patches as conv filters ss_enc = tf.nn.conv2d(content, style_patches_norm, [1,stride,stride,1], 'VALID') # For each spatial position find index of max along channel/patch dim ss_argmax = tf.argmax(ss_enc, axis=3) encC = tf.shape(ss_enc)[-1] # Num channels in intermediate conv output, same as # of patches # One-hot encode argmax with same size as ss_enc, with 1's in max channel idx for each spatial pos ss_oh = tf.one_hot(ss_argmax, encC, 1., 0., 3) # Calc size of transposed conv out deconv_out_H = utils.deconv_output_length(tf.shape(ss_oh)[1], patch_size, 'valid', stride) deconv_out_W = utils.deconv_output_length(tf.shape(ss_oh)[2], patch_size, 'valid', stride) deconv_out_shape = tf.stack([1,deconv_out_H,deconv_out_W,nC]) # Deconv back to original content size with highest matching (unnormalized) style patch swapped in for each content patch ss_dec = tf.nn.conv2d_transpose(ss_oh, style_patches, deconv_out_shape, [1,stride,stride,1], 'VALID') ### Interpolate to average overlapping patch locations ss_oh_sum = tf.reduce_sum(ss_oh, axis=3, keep_dims=True) filter_ones = tf.ones([patch_size,patch_size,1,1], dtype=tf.float32) deconv_out_shape = tf.stack([1,deconv_out_H,deconv_out_W,1]) # Same spatial size as ss_dec with 1 channel counting = tf.nn.conv2d_transpose(ss_oh_sum, filter_ones, deconv_out_shape, [1,stride,stride,1], 'VALID') counting = tf.tile(counting, [1,1,1,nC]) # Repeat along channel dim to make same size as ss_dec interpolated_dec = tf.divide(ss_dec, counting) return interpolated_dec
def setup_loss(self, mini_batch_size): self.expected_output = tf.placeholder(tf.float32, shape=[None, 10], name="expected_output") s = tf.subtract(self.output, self.expected_output) self.C = tf.reduce_sum(tf.multiply(s, s)) m = tf.constant( 2.0*mini_batch_size, dtype=tf.float32 ) self.C = tf.divide(self.C, m)
def batchLoss(out_batch, # [batch_size,(1..2)] tf_result target_disparity_batch, # [batch_size] tf placeholder gt_ds_batch, # [batch_size,2] tf placeholder absolute_disparity = True, #when false there should be no activation on disparity output ! use_confidence = True, lambda_conf_avg = 0.01, lambda_conf_pwr = 0.1, conf_pwr = 2.0, gt_conf_offset = 0.08, gt_conf_pwr = 1.0, error2_offset = 0.0025): # (0.05^2) with tf.name_scope("BatchLoss"): """ Here confidence should be after relU. Disparity - may be also if absolute, but no activation if output is residual disparity """ tf_lambda_conf_avg = tf.constant(lambda_conf_avg, dtype=tf.float32, name="tf_lambda_conf_avg") tf_lambda_conf_pwr = tf.constant(lambda_conf_pwr, dtype=tf.float32, name="tf_lambda_conf_pwr") tf_conf_pwr = tf.constant(conf_pwr, dtype=tf.float32, name="tf_conf_pwr") tf_gt_conf_offset = tf.constant(gt_conf_offset, dtype=tf.float32, name="tf_gt_conf_offset") tf_gt_conf_pwr = tf.constant(gt_conf_pwr, dtype=tf.float32, name="tf_gt_conf_pwr") tf_num_tiles = tf.shape(gt_ds_batch)[0] tf_0f = tf.constant(0.0, dtype=tf.float32, name="tf_0f") tf_1f = tf.constant(1.0, dtype=tf.float32, name="tf_1f") tf_maxw = tf.constant(1.0, dtype=tf.float32, name="tf_maxw") if gt_conf_pwr == 0: w = tf.ones((out_batch.shape[0]), dtype=tf.float32,name="w_ones") else: # w_slice = tf.slice(gt_ds_batch,[0,1],[-1,1], name = "w_gt_slice") w_slice = tf.reshape(gt_ds_batch[:,1],[-1], name = "w_gt_slice") w_sub = tf.subtract (w_slice, tf_gt_conf_offset, name = "w_sub") # w_clip = tf.clip_by_value(w_sub, tf_0f,tf_maxw, name = "w_clip") w_clip = tf.maximum(w_sub, tf_0f, name = "w_clip") if gt_conf_pwr == 1.0: w = w_clip else: w=tf.pow(w_clip, tf_gt_conf_pwr, name = "w_pow") if use_confidence: tf_num_tilesf = tf.cast(tf_num_tiles, dtype=tf.float32, name="tf_num_tilesf") # conf_slice = tf.slice(out_batch,[0,1],[-1,1], name = "conf_slice") conf_slice = tf.reshape(out_batch[:,1],[-1], name = "conf_slice") conf_sum = tf.reduce_sum(conf_slice, name = "conf_sum") conf_avg = tf.divide(conf_sum, tf_num_tilesf, name = "conf_avg") conf_avg1 = tf.subtract(conf_avg, tf_1f, name = "conf_avg1") conf_avg2 = tf.square(conf_avg1, name = "conf_avg2") cost2 = tf.multiply (conf_avg2, tf_lambda_conf_avg, name = "cost2") iconf_avg = tf.divide(tf_1f, conf_avg, name = "iconf_avg") nconf = tf.multiply (conf_slice, iconf_avg, name = "nconf") #normalized confidence nconf_pwr = tf.pow(nconf, conf_pwr, name = "nconf_pwr") nconf_pwr_sum = tf.reduce_sum(nconf_pwr, name = "nconf_pwr_sum") nconf_pwr_offs = tf.subtract(nconf_pwr_sum, tf_1f, name = "nconf_pwr_offs") cost3 = tf.multiply (conf_avg2, nconf_pwr_offs, name = "cost3") w_all = tf.multiply (w, nconf, name = "w_all") else: w_all = w # cost2 = 0.0 # cost3 = 0.0 # normalize weights w_sum = tf.reduce_sum(w_all, name = "w_sum") iw_sum = tf.divide(tf_1f, w_sum, name = "iw_sum") w_norm = tf.multiply (w_all, iw_sum, name = "w_norm") # disp_slice = tf.slice(out_batch,[0,0],[-1,1], name = "disp_slice") # d_gt_slice = tf.slice(gt_ds_batch,[0,0],[-1,1], name = "d_gt_slice") disp_slice = tf.reshape(out_batch[:,0],[-1], name = "disp_slice") d_gt_slice = tf.reshape(gt_ds_batch[:,0],[-1], name = "d_gt_slice") if absolute_disparity: out_diff = tf.subtract(disp_slice, d_gt_slice, name = "out_diff") else: td_flat = tf.reshape(target_disparity_batch,[-1], name = "td_flat") residual_disp = tf.subtract(d_gt_slice, td_flat, name = "residual_disp") out_diff = tf.subtract(disp_slice, residual_disp, name = "out_diff") out_diff2 = tf.square(out_diff, name = "out_diff2") out_wdiff2 = tf.multiply (out_diff2, w_norm, name = "out_wdiff2") cost1 = tf.reduce_sum(out_wdiff2, name = "cost1") out_diff2_offset = tf.subtract(out_diff2, error2_offset, name = "out_diff2_offset") out_diff2_biased = tf.maximum(out_diff2_offset, 0.0, name = "out_diff2_biased") out_diff2_wbiased = tf.multiply(out_diff2_biased, w_norm, name = "out_diff2_wbiased") cost1b = tf.reduce_sum(out_diff2_wbiased, name = "cost1b") if use_confidence: cost12 = tf.add(cost1b, cost2, name = "cost12") cost123 = tf.add(cost12, cost3, name = "cost123") return cost123, disp_slice, d_gt_slice, out_diff,out_diff2, w_norm, out_wdiff2, cost1 else: return cost1b, disp_slice, d_gt_slice, out_diff,out_diff2, w_norm, out_wdiff2, cost1
class Rider: with tf.device("/job:rider/task:0"): flick_intensity = tf.placeholder(tf.int32, (), 'flick_intensity') flick_whip = Horse.whip.enqueue(flick_intensity, 'flick_whip') measure_distance_covered = tf.divide(Horse.steps_taken, 2, 'measure_distance_covered')
def get_loss(self, raw_logits, labels, valid_mode=False): """Add the loss ops to the current graph. Args: raw_logits: A `Tensor` holding the activations from the network. labels: A `Tensor` holding the one hot encoded ground truth. # [Batch, classes] valid_mode: A `bool`, define the model in trainings mode or validation mode. Returns: loss: A `Tensor` object holding the loss as scalar. f1_score: A `Tensor` object holding the F1 score. """ name_suffix = '_train' if valid_mode: name_suffix = '_valid' with tf.variable_scope('loss{}'.format(name_suffix)) as vs: softmax_logits = tf.nn.softmax( raw_logits, dim=2, name='logits') # [Batch, classes, Pos-Neg] softmax_logits = tf.reshape( softmax_logits, [self.FLAGS.batchsize, self.go_info.nclasses, 2], name='softmax2predictions') softmax_logits = softmax_logits[:, :, 0] # [Batch, classes, Pos] # positives positive_predictions = tf.cast(tf.greater(softmax_logits, 0.5), dtype=tf.float32) true_positive_predictions = tf.multiply(positive_predictions, labels) # negatives negative_predictions = tf.cast(tf.less(softmax_logits, 0.5), dtype=tf.float32) negative_labels = tf.cast(tf.equal(labels, 0), dtype=tf.float32) # [Batch, classes] true_negative_predictions = tf.multiply(negative_predictions, negative_labels) false_negative_predictions = tf.multiply(negative_labels, labels) false_positive_predictions = tf.multiply(positive_predictions, negative_labels) # stats nr_pred_positives = tf.reduce_sum(positive_predictions) nr_true_positives = tf.reduce_sum(true_positive_predictions) nr_true_negatives = tf.reduce_sum(true_negative_predictions) nr_false_positives = tf.reduce_sum(false_positive_predictions) nr_false_negatives = tf.reduce_sum(false_negative_predictions) tpr = tf.divide(nr_true_positives, tf.reduce_sum(labels)) fdr = tf.divide(nr_false_positives, nr_pred_positives) fpr = tf.divide(nr_false_positives, tf.reduce_sum(negative_labels)) tnr = tf.divide(nr_true_negatives, tf.reduce_sum(negative_labels)) # accuracy f1_score = tf.divide( nr_true_positives * 2, tf.add(tf.add(2 * nr_true_positives, nr_false_negatives), nr_false_positives)) tf.summary.scalar('TPR', tpr) tf.summary.scalar('FPR', fpr) tf.summary.scalar('FDR', fdr) tf.summary.scalar('TNR', tnr) tf.summary.scalar('F1', f1_score) tf.summary.scalar( 'avg_pred_positives', tf.divide(nr_pred_positives, self.FLAGS.batchsize)) tf.summary.scalar( 'avg_true_positives', tf.divide(nr_true_positives, self.FLAGS.batchsize)) class_sizes = np.asfarray(list( self.go_info.key2freq.values())) # [classes] mean_class_size = np.mean(class_sizes) # [classes] pos_weights = mean_class_size / class_sizes # [classes] # config.maxClassInbalance prevents too large effective learning rates (i.e. too large gradients) assert self.FLAGS.maxclassimbalance >= 1.0 pos_weights = np.maximum(1.0, np.minimum(self.FLAGS.maxclassimbalance, pos_weights)) # [classes] pos_weights = pos_weights.astype(np.float32) # [classes] # tile the pos weigths: pos_weights = tf.reshape( tf.tile(pos_weights, multiples=[self.FLAGS.batchsize]), shape=[self.FLAGS.batchsize, self.go_info.nclasses]) # [batch, classes] pos_weights = tf.stack([pos_weights, pos_weights], axis=-1) # [batch, classes, Pos-Neg] inverse_labels = tf.cast(tf.equal(labels, 0), dtype=tf.float32) # [batch, classes] expanded_labels = tf.stack([labels, inverse_labels], axis=-1) # labels, inverse labels expanded_labels = tf.reshape( expanded_labels, shape=[self.FLAGS.batchsize, self.go_info.nclasses, 2]) # [batch, classes, Pos-Neg] ce_loss = tf.nn.weighted_cross_entropy_with_logits( logits=raw_logits, targets=expanded_labels, pos_weight=pos_weights) ce_mean = tf.reduce_mean(ce_loss, name='celoss_mean') # get the l2 loss on weigths of conv layers and dense layers l2_loss = 0 for w in tl.layers.get_variables_with_name('W_conv1d', train_only=True, printable=False): l2_loss += tf.contrib.layers.l2_regularizer(1e-7)(w) for w in tl.layers.get_variables_with_name('W_conv2d', train_only=True, printable=False): l2_loss += tf.contrib.layers.l2_regularizer(1e-7)(w) for w in tl.layers.get_variables_with_name('W', train_only=True, printable=False): l2_loss += tf.contrib.layers.l2_regularizer(1e-7)(w) loss = ce_mean + l2_loss tf.summary.scalar('loss_total', loss) tf.summary.scalar('loss_l2', l2_loss) tf.summary.scalar('loss_CE', ce_mean) self.logger.info("Initialized loss!") return loss, f1_score
def vlad_layer(self, bottom, k_center, name, trainable=False): num_batches = bottom.get_shape()[0].value num_features = bottom.get_shape()[1].value * bottom.get_shape( )[2].value dim_feature = bottom.get_shape()[3].value bottom = tf.reshape( bottom, [num_batches, num_features, dim_feature]) # B * N * D with tf.variable_scope(name): self.vlad_w, self.vlad_b, self.vlad_c = self.get_vlad_var( [k_center, dim_feature], [k_center, 1], [k_center, dim_feature], name, trainable) w = self.vlad_w bias = self.vlad_b c = self.vlad_c self.wt = tf.transpose(w, perm=[1, 0]) # D * K # (N * D) * (D * K) t = tf.matmul(bottom, tf.tile(tf.expand_dims(self.wt, 0), multiples=[num_batches, 1, 1])) # BNK bias = tf.reshape(bias, [k_center]) bias = tf.tile(tf.expand_dims(bias, 0), [num_features, 1]) bias = tf.tile(tf.expand_dims(bias, 0), [num_batches, 1, 1]) t = tf.add(t, bias) self.numerator = tf.exp(t) # B N K self.denominator = tf.reduce_sum(self.numerator, 2) # B N for k in range(k_center): self.alpha = tf.divide(self.numerator[:, :, k], self.denominator) # B N self.alpha = tf.expand_dims(self.alpha, -1) if k == 0: outputs = tf.reduce_sum(tf.multiply( self.alpha, (bottom - tf.slice(c, [k, 0], [1, dim_feature]))), axis=1) outputs = tf.expand_dims(outputs, 1) # B1D else: outputs = tf.concat([ outputs, tf.expand_dims( tf.reduce_sum(tf.multiply( self.alpha, (bottom - tf.slice(c, [k, 0], [1, dim_feature]))), axis=1), 1) ], 1) outputs = tf.nn.l2_normalize(outputs, dim=2) #intra-normalization B * K * D outputs = tf.reshape(outputs, [num_batches, -1]) outputs = tf.nn.l2_normalize(outputs, dim=1) #l2 normalization return outputs
def _get_cost(self, logits, cost_name, cost_kwargs={}): """ Constructs the cost function, either cross_entropy, weighted cross_entropy or dice_coefficient. Optional arguments are: class_weights: weights for the different classes in case of multi-class imbalance regularizer: power of the L2 regularizers added to the loss function """ flat_logits = tf.reshape(logits, [-1, self.n_class]) flat_labels = tf.reshape(self.y, [-1, self.n_class]) if cost_name == "cross_entropy": if "class_weights" in cost_kwargs: class_weights = tf.constant( np.array(cost_kwargs["class_weights"], dtype=np.float32)) weight_map = tf.multiply(flat_labels, class_weights) weight_map = tf.reduce_sum(weight_map, axis=1) loss_map = tf.nn.softmax_cross_entropy_with_logits( logits=flat_logits, labels=flat_labels) weighted_loss = tf.multiply(loss_map, weight_map) loss = tf.reduce_mean(weighted_loss) else: loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=flat_logits, labels=flat_labels)) # elif cost_name == "dice_coefficient": # eps = 1e-5 # prediction = pixel_wise_softmax_2(logits) # intersection = tf.reduce_sum(prediction * self.y, axis=[0, 1, 2]) # union = eps + tf.reduce_sum(prediction, axis=[0, 1, 2]) + tf.reduce_sum(self.y, axis=[0, 1, 2]) - intersection # loss = tf.reduce_sum(-(2 * intersection / (union))) elif cost_name == "liver_dice": eps = 1e-5 prediction = tf.argmax(pixel_wise_softmax_2(logits), axis=3) gt = tf.argmax(self.y, axis=3) prediction_b = prediction > 0 gt_b = gt > 0 intersection = tf.to_float(tf.count_nonzero(prediction_b & gt_b)) size_pred = tf.to_float(tf.count_nonzero(prediction_b)) size_gt = tf.to_float(tf.count_nonzero(gt_b)) loss = -(2. * intersection / (size_pred + size_gt + eps)) elif cost_name == "tumor_dice": eps = 1e-5 prediction = tf.argmax(pixel_wise_softmax_2(logits), axis=3) gt = tf.argmax(self.y, axis=3) prediction_b = prediction > 1 gt_b = gt > 1 intersection = tf.to_float(tf.count_nonzero(prediction_b & gt_b)) size_pred = tf.to_float(tf.count_nonzero(prediction_b)) size_gt = tf.to_float(tf.count_nonzero(gt_b)) loss = -(2. * intersection / (size_pred + size_gt + eps)) elif cost_name == "avg_class_ce": if "class_weights" in cost_kwargs: class_weights = cost_kwargs["class_weights"] else: class_weights = np.ones(self.n_class) class_weights = tf.constant( np.array(class_weights, dtype=np.float32)) # class_weights = tf.Print(class_weights, [class_weights], 'Class weigihts:') weight_map = tf.multiply(flat_labels, class_weights) loss_map = tf.nn.softmax_cross_entropy_with_logits( logits=flat_logits, labels=flat_labels) loss_map = tf.tile(tf.expand_dims(loss_map, 1), [1, self.n_class]) # both are npixel x n_class weighted_loss = tf.multiply(loss_map, weight_map) loss_sum_per_class = tf.reduce_sum(weighted_loss, axis=0) # loss_sum_per_class = tf.Print(loss_sum_per_class, [loss_sum_per_class], 'Sum of loss per class:') px_per_class = tf.reduce_sum(flat_labels, axis=0) # px_per_class = tf.Print(px_per_class, [px_per_class], 'Pixels per class:') include_class = tf.not_equal(px_per_class, 0) loss_sum_per_class_valid = tf.boolean_mask(loss_sum_per_class, include_class) px_per_class_valid = tf.boolean_mask(px_per_class, include_class) loss_per_class = tf.divide(loss_sum_per_class_valid, px_per_class_valid) # loss_per_class = tf.Print(loss_per_class, [loss_per_class], 'Mean loss per class:') loss = tf.reduce_mean(loss_per_class) # loss = tf.Print(loss, [loss], "Loss:") elif cost_name == "avg_class_ce_binary": """Only care whether it's tumor or not""" if "class_weights" in cost_kwargs: class_weights = cost_kwargs["class_weights"] else: class_weights = np.ones(self.n_class - 1) class_weights = tf.constant( np.array(class_weights, dtype=np.float32)) combined_labels = tf.stack( [flat_labels[:, 0] + flat_labels[:, 1], flat_labels[:, 2]], axis=1) combined_logits = tf.stack([ tf.log(tf.exp(flat_logits[:, 0]) + tf.exp(flat_logits[:, 1])), flat_logits[:, 2] ], axis=1) weight_map = tf.multiply(combined_labels, class_weights) loss_map = tf.nn.softmax_cross_entropy_with_logits( logits=combined_logits, labels=combined_labels) loss_map = tf.tile(tf.expand_dims(loss_map, 1), [1, self.n_class - 1]) # both are npixel x n_class weighted_loss = tf.multiply(loss_map, weight_map) loss_sum_per_class = tf.reduce_sum(weighted_loss, axis=0) # loss_sum_per_class = tf.Print(loss_sum_per_class, [loss_sum_per_class], 'Sum of loss per class:') px_per_class = tf.reduce_sum(combined_labels, axis=0) # px_per_class = tf.Print(px_per_class, [px_per_class], 'Pixels per class:') include_class = tf.not_equal(px_per_class, 0) loss_sum_per_class_valid = tf.boolean_mask(loss_sum_per_class, include_class) px_per_class_valid = tf.boolean_mask(px_per_class, include_class) loss_per_class = tf.divide(loss_sum_per_class_valid, px_per_class_valid) # loss_per_class = tf.Print(loss_per_class, [loss_per_class], 'Mean loss per class:') loss = tf.reduce_mean(loss_per_class) # loss = tf.Print(loss, [loss], "Loss:") elif cost_name == "avg_class_ce_symmetric": prediction = pixel_wise_softmax_2(logits) flat_prediction = tf.reshape(prediction, [-1, self.n_class]) if "class_weights" in cost_kwargs: class_weights = cost_kwargs["class_weights"] else: class_weights = np.ones(self.n_class) class_weights = tf.constant( np.array(class_weights, dtype=np.float32)) weight_map = tf.multiply(flat_labels, class_weights) + 0.1 * tf.multiply( flat_prediction, class_weights) loss_map = tf.nn.softmax_cross_entropy_with_logits( logits=flat_logits, labels=flat_labels) loss_map = tf.tile(tf.expand_dims(loss_map, 1), [1, self.n_class]) # both are npixel x n_class weighted_loss = tf.multiply(loss_map, weight_map) loss_sum_per_class = tf.reduce_sum(weighted_loss, axis=0) px_per_class = tf.reduce_sum( flat_labels, axis=0) + 0.1 * tf.reduce_sum(flat_prediction, axis=0) include_class = tf.not_equal(px_per_class, 0) loss_sum_per_class_valid = tf.boolean_mask(loss_sum_per_class, include_class) px_per_class_valid = tf.boolean_mask(px_per_class, include_class) loss_per_class = tf.divide(loss_sum_per_class_valid, px_per_class_valid) loss = tf.reduce_mean(loss_per_class) else: raise ValueError("Unknown cost function: " % cost_name) if "regularizer" in cost_kwargs: regularizer = cost_kwargs["regularizer"] regularizers = sum( [tf.nn.l2_loss(variable) for variable in self.variables]) loss += (regularizer * regularizers) return loss
def _build_tf_graph(self): # Creating the tf session tf.set_random_seed(self.seed) self.sess = tf.Session() # Input placeholders self.tfImgs = tf.placeholder(tf.float32, shape=(None, self.height, self.width, 3)) self.tfNorms = tf.placeholder(tf.float32, shape=(None, self.height, self.width, 3)) # Scaling # Randomly chooses a scaling factor scales = tf.convert_to_tensor(self.scale) rand_index = tf.random_uniform([], minval=0, maxval=2, dtype=tf.int32) rand_scale = scales[rand_index] # Scales size = tf.cast( [self.batch_height * rand_scale, self.batch_width * rand_scale], tf.int32) reszImgs = tf.image.resize_images(self.tfImgs, size) reszNorms = tf.image.resize_images(self.tfNorms, size) normNorms = tf.nn.l2_normalize(reszNorms, 3) # Random Crop # Random height offset maxHeightIndex = size[0] - 240 heightIndex = tf.random_uniform([], minval=0, maxval=maxHeightIndex + 1, dtype=tf.int32) # Random width offset maxWidthIndex = size[1] - 320 widthIndex = tf.random_uniform([], minval=0, maxval=maxWidthIndex + 1, dtype=tf.int32) # Crops cropImgs = tf.image.crop_to_bounding_box(reszImgs, heightIndex, widthIndex, 240, 320) cropNorms = tf.image.crop_to_bounding_box(normNorms, heightIndex, widthIndex, 240, 320) # Flip , this is a lazy definition, its excution depends on the rand_flip flipImgs = tf.reverse(cropImgs, [2]) revNorms = tf.reverse(cropNorms, [2]) flipNorms = tf.multiply([-1.0, 1.0, 1.0], revNorms) # Random flip rand_flip = tf.cast( tf.random_uniform([], minval=0, maxval=2, dtype=tf.int32), tf.bool) randFlipImgs = tf.cond(rand_flip, lambda: flipImgs, lambda: cropImgs) #Flip or last value randFlipNorms = tf.cond(rand_flip, lambda: flipNorms, lambda: cropNorms) # Flip or last value # Random color changes change_color = tf.cast(self.change_color, tf.bool) # Delta values hueDelta = tf.random_uniform([], -self.maxHueDelta, self.maxHueDelta) satFactor = tf.random_uniform([], 1.0 - self.maxSatDelta, 1.0 + self.maxSatDelta) # Convert image RGB values to [0,1] range rngImgs = tf.clip_by_value(tf.divide(randFlipImgs, 255.0), 0.0, 1.0) # Convert RGB images to HSV hsvImgs = tf.image.rgb_to_hsv(rngImgs) hue = tf.slice(hsvImgs, [0, 0, 0, 0], [-1, -1, -1, 1]) saturation = tf.slice(hsvImgs, [0, 0, 0, 1], [-1, -1, -1, 1]) value = tf.slice(hsvImgs, [0, 0, 0, 2], [-1, -1, -1, 1]) # Change hue and saturation hue = tf.cond(change_color, lambda: tf.mod(hue + (hueDelta + 1.), 1.), lambda: hue) saturation = tf.cond( change_color, lambda: tf.clip_by_value(saturation * satFactor, 0.0, 1.0), lambda: saturation) # Convert to RGB hsv = tf.concat([hue, saturation, value], 3) colorImgs = tf.image.hsv_to_rgb(hsv) # Outputs self.tfOutImgs = tf.image.convert_image_dtype(colorImgs, tf.uint8, saturate=True) self.tfOutNorms = randFlipNorms
y_conv = floatToFixPoint(y_conv) if i < 2000: result = sess.run(merged, feed_dict={ x:batch[0], y_: batch[1]}) file_writer.add_summary(result, i) #print (sess.run(tf.nn.softmax(y_conv), feed_dict={ x:batch[0], y_:batch[1]})) #print (batch[1]) if i%500 == 0: print sess.run(W_fc2) print sess.run(tf.subtract(W_init,W_fc2)) #print (sess.run(W_fc1)) #print out the deltas every 100 steps #print (sess.run(W_deltas_abs)) print ("test accuracy %g" %accuracy.eval(feed_dict={ x: mnist.test.images, y_: mnist.test.labels})) print sess.run(W_fc2) print(sess.run(save_W_init)) print(sess.run(tf.divide(W_init,2000)))
def train_op(self): if self.conv5: tvars = [ var for var in tf.trainable_variables() if var.op.name.startswith('text_objseg') or var.name.startswith('res5') or var.name.startswith('res4') or var.name.startswith('res3') ] else: tvars = [ var for var in tf.trainable_variables() if var.op.name.startswith('text_objseg') ] reg_var_list = [ var for var in tvars if var.op.name.find(r'DW') > 0 or var.name[-9:-2] == 'weights' ] print('Collecting variables for regularization:') for var in reg_var_list: print('\t%s' % var.name) print('Done.') # define loss self.target = tf.image.resize_bilinear(self.target_fine, [self.vf_h, self.vf_w]) self.cls_loss_c5 = loss.weighed_logistic_loss(self.up_c5, self.target_fine, 1, 1) self.cls_loss_c4 = loss.weighed_logistic_loss(self.up_c4, self.target_fine, 1, 1) # self.cls_loss_c3 = loss.weighed_logistic_loss(self.up_c3, self.target_fine, 1, 1) self.cls_loss = loss.weighed_logistic_loss(self.up, self.target_fine, 1, 1) self.cls_loss_all = 0.8 * self.cls_loss + 0.1 * self.cls_loss_c5 \ + 0.1 * self.cls_loss_c4 self.reg_loss = loss.l2_regularization_loss(reg_var_list, self.weight_decay) self.cost = self.cls_loss_all + self.reg_loss # learning rate self.train_step = tf.Variable(0, trainable=False) self.learning_rate = tf.train.polynomial_decay( self.start_lr, self.train_step, self.lr_decay_step, end_learning_rate=0.00001, power=0.9) # optimizer if self.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(self.learning_rate) else: raise ValueError("Unknown optimizer type %s!" % self.optimizer) # learning rate multiplier grads_and_vars = optimizer.compute_gradients(self.cost, var_list=tvars) var_lr_mult = {} for var in tvars: if var.op.name.find(r'biases') > 0: var_lr_mult[var] = 2.0 elif var.name.startswith('res5') or var.name.startswith( 'res4') or var.name.startswith('res3'): var_lr_mult[var] = 1.0 else: var_lr_mult[var] = 1.0 print('Variable learning rate multiplication:') for var in tvars: print('\t%s: %f' % (var.name, var_lr_mult[var])) print('Done.') grads_and_vars = [ ((g if var_lr_mult[v] == 1 else tf.multiply(var_lr_mult[v], g)), v) for g, v in grads_and_vars ] # training step self.train = optimizer.apply_gradients(grads_and_vars, global_step=self.train_step) # Summary in tensorboard tf.summary.scalar('loss_all', self.cls_loss_all) # tf.summary.scalar('loss_c3', self.cls_loss_c3) tf.summary.scalar('loss_c4', self.cls_loss_c4) tf.summary.scalar('loss_c5', self.cls_loss_c5) tf.summary.scalar('loss_last', self.cls_loss) pred = tf.convert_to_tensor(tf.cast(self.up > 0, tf.int32), tf.int32) labl = self.target_fine intersect = tf.reduce_sum(tf.cast( tf.logical_and(tf.cast(pred, tf.bool), tf.cast(labl, tf.bool)), tf.int32), axis=(1, 2, 3)) union = tf.reduce_sum(tf.cast( tf.logical_or(tf.cast(pred, tf.bool), tf.cast(labl, tf.bool)), tf.int32), axis=(1, 2, 3)) self.mIoU = tf.reduce_mean(tf.divide(intersect, union)) tf.summary.scalar('mean_IOU', self.mIoU) self.merged = tf.summary.merge_all()
def custom_v3(is_training, images, params, mode): """Compute outputs of the model (embeddings for triplet loss). Args: is_training: (bool) whether we are training or not images: (dict) contains the inputs of the graph (features) this can be `tf.placeholder` or outputs of `tf.data` params: (Params) hyperparameters Returns: output: (tf.Tensor) output of the model """ # Apply dropout to the input layer input_dropout = tf.layers.dropout(images, rate=params.input_dropout, training=is_training, name='input_dropout') # Define the number of filters for each convolution # For each block, we do: 3x3 conv -> batch norm -> relu -> 2x2 maxpool image_size_in = params.image_size num_filters = params.num_filters num_blocks = params.num_blocks bn_momentum = params.bn_momentum filters = [ 32, 64, 128 ] # each element in this list indicates the number of filters to use in a new conv block if params.image_size != 96: raise ValueError( "Image size should be equal to 96 if you want to use custom_v3.") out = input_dropout for i, f in enumerate(filters): with tf.variable_scope('block_{}'.format(i + 1)): out = tf.layers.conv2d(out, f, 3, padding='same') if params.use_batch_norm: out = tf.layers.batch_normalization(out, momentum=bn_momentum, training=is_training) out = tf.nn.relu(out) out = tf.layers.conv2d(out, f, 3, padding='same') if params.use_batch_norm: out = tf.layers.batch_normalization(out, momentum=bn_momentum, training=is_training) out = tf.nn.relu(out) out = tf.layers.max_pooling2d(out, 2, 2) image_size_out = int(image_size_in / (2**3)) # 3 reductions by 2*2 maxpool assert out.shape[1:] == [ image_size_out, image_size_out, filters[-1] ], "filters: {}\nout shape: {}\nimage_size_out: {}".format( filters[-1], out.shape, image_size_out) # 12 x 12 x 128 out = tf.layers.conv2d(out, 64, 1, padding='same') # 12 x 12 x 64 out = tf.layers.average_pooling2d(out, 12, strides=1) # 1 x 1 x 64 out = tf.reshape(out, [-1, 1 * 1 * 64]) with tf.variable_scope('fc'): out = tf.layers.dense(out, params.embedding_size) out = tf.divide( out, tf.expand_dims(tf.norm(out, ord='euclidean', axis=1) + 1e-16, 1)) out = params.alpha * out # 1 x 1 x 64 return out
def graph(x, y, i, x_max, x_min, accum_s, accum_g): eps = 2.0 * FLAGS.max_epsilon / 255.0 num_iter = FLAGS.num_iter alpha = eps / num_iter # momentum = FLAGS.momentum num_classes = 1001 beta_1 = FLAGS.beta_1 beta_2 = FLAGS.beta_2 x_nes = x with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_v3, end_points_v3 = inception_v3.inception_v3( input_diversity(x_nes), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) pred = tf.argmax(end_points_v3['Predictions'], 1) first_round = tf.cast(tf.equal(i, 0), tf.int64) y = first_round * pred + (1 - first_round) * y one_hot = tf.one_hot(y, num_classes) cross_entropy = tf.losses.softmax_cross_entropy(one_hot, logits_v3) grad = tf.gradients(cross_entropy, x)[0] x_nes_2 = 1 / 2 * x_nes with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_v3_2, end_points_v3 = inception_v3.inception_v3( input_diversity(x_nes_2), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) cross_entropy_2 = tf.losses.softmax_cross_entropy(one_hot, logits_v3_2) grad += tf.gradients(cross_entropy_2, x)[0] x_nes_4 = 1 / 4 * x_nes with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_v3_4, end_points_v3 = inception_v3.inception_v3( input_diversity(x_nes_4), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) cross_entropy_4 = tf.losses.softmax_cross_entropy(one_hot, logits_v3_4) grad += tf.gradients(cross_entropy_4, x)[0] x_nes_8 = 1 / 8 * x_nes with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_v3_8, end_points_v3 = inception_v3.inception_v3( input_diversity(x_nes_8), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) cross_entropy_8 = tf.losses.softmax_cross_entropy(one_hot, logits_v3_8) grad += tf.gradients(cross_entropy_8, x)[0] x_nes_16 = 1 / 16 * x_nes with slim.arg_scope(inception_v3.inception_v3_arg_scope()): logits_v3_16, end_points_v3 = inception_v3.inception_v3( input_diversity(x_nes_16), num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) cross_entropy_16 = tf.losses.softmax_cross_entropy(one_hot, logits_v3_16) grad += tf.gradients(cross_entropy_16, x)[0] grad = tf.nn.depthwise_conv2d(grad, stack_kernel, strides=[1, 1, 1, 1], padding='SAME') grad_normed = grad / tf.reduce_mean(tf.abs(grad), [1, 2, 3], keep_dims=True) accum_g = grad_normed * (1 - beta_1) + accum_g * beta_1 accum_s = tf.multiply(grad, grad) * (1 - beta_2) + accum_s * beta_2 accum_g_hat = tf.divide(accum_g, (1 - tf.pow(beta_1, tf.cast(i + 1, tf.float32)))) accum_s_hat = tf.divide(accum_s, (1 - tf.pow(beta_2, tf.cast(i + 1, tf.float32)))) x = x + tf.multiply(tf.divide(alpha, tf.add(tf.sqrt(accum_s_hat), 1e-6)), tf.sign(accum_g_hat)) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min, accum_s, accum_g
print("----------Tensorflow has been set----------") print("\n") #we are going to be testing every model that we had previously saved saver = tf.train.Saver() sess = tf.InteractiveSession() #Creation of a queue, working with num_epochs epochs so num_epochs*100 images, an image will basically be shown num_epochs times filename_validation_queue = tf.train.string_input_producer( [tfrecords_validation_file], shuffle=shuffler, num_epochs=None) #Get an image batch validation_images, validation_labels = read_and_decode( filename_validation_queue, validation_batch, capacity) unnormalized_validation_images = validation_images #Normalization of data validation_images = tf.divide((tf.subtract(validation_images, mean)), standard_deviation) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) #We run our batch coordinator coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) limit = 10000 # models=["./validation_weights/weights_iteration_890.ckpt"] for model in models: # if(len(models)>1): # print("we stop because there are too many models") # break #the following list's purpose is to track recognition rate and error rate, it will be used to evaluate model thresholding #it contains a pair that represents probablity of the majority class and the classification result, either well classified or not #example (0.89,1) represents an image that was corrctly classified due to 1 with a probability of 0.98
def normalize(tensor): return tf.divide(tf.subtract(tensor, tf.reduce_min(tensor)), tf.subtract(tf.reduce_max(tensor), tf.reduce_min(tensor)))
def adjust_sigmoid(image, cutoff=0.5, gain=10.0, inv=False): ''' Applies sigmoid correction (also known as contrast adjustment) to the input image. Parameters: image: ndarray Input image to which sigmoid correction is to be applied. cutoff: float Adjusts the horizontal shift of the sigmoid curve. Defaults to 0.5. gain: float Adjusts the slope of the sigmoid curve: Defaults to 10.0 inv: bool If True, the negative sigmoid correction is used. Defaults to False Returns: adjusted_image: ndarray The resultant image when sigmoid correction is applied to the input image. ''' # Convert the image to tf constant input_image = tf.constant(image) # Determine limits # Note that if the dtype of the pixels in image are floats, they # should have values between 0 and 1 limits = input_image.dtype.limits normalizer = limits[1] - limits[0] # Normalize the inputs normalized = tf.divide(image, normalizer) # Cutoff for horizontal shift cut_off = tf.math.subtract(tf.constant(cutoff, tf.float32), normalized) # Gain for slope of sigmoid function gained = tf.multiply(tf.constant(gain, tf.float32), cut_off) exp = tf.math.exp(gained) # Perform 1 / (1 + exp) add_one = tf.math.add(exp, 1.0) divider = tf.math.divide(1.0, add_one) # 1 - divider if inv==True condition = tf.cond(tf.cast(inv, tf.bool), lambda: tf.subtract(1.0, divider), lambda: divider) # Revert the initial normalization output = tf.multiply(condition, normalizer) # Initialize variables and run the session. init = tf.compat.v1.global_variables_initializer() with tf.Session() as sess: # Run session with output sess.run(init) adjusted_image = sess.run(output) sess.close() # Return adjusted image return adjusted_image
def main(): """Create the model and start the evaluation process.""" args = get_arguments() print(args) tf.reset_default_graph() # Set placeholder image1_filename = tf.placeholder(dtype=tf.string) image2_filename = tf.placeholder(dtype=tf.string) current_output = tf.placeholder(tf.int64, [4, 512, 1024]) # Read & Decode image image1 = tf.image.decode_image(tf.read_file(image1_filename), channels=3) image2 = tf.image.decode_image(tf.read_file(image2_filename), channels=3) image1.set_shape([None, None, 3]) image2.set_shape([None, None, 3]) image1 = tf.expand_dims(preprocess(image1), dim=0) image2 = tf.expand_dims(preprocess(image2), dim=0) image_batch = tf.image.resize_bilinear(image1 - IMG_MEAN, input_size) current_frame = tf.image.resize_bilinear( (image2) / 255.0, (input_size[0] // 2, input_size[1] // 2)) key_frame = tf.image.resize_bilinear( (image1) / 255.0, (input_size[0] // 2, input_size[1] // 2)) output_size = [512, 1024] image_batch = tf.concat([ image_batch[:, :512, :1024, :], image_batch[:, :512, 1024:, :], image_batch[:, 512:, :1024, :], image_batch[:, 512:, 1024:, :] ], 0) key_frame = tf.concat([ key_frame[:, :256, :512, :], key_frame[:, :256, 512:, :], key_frame[:, 256:, :512, :], key_frame[:, 256:, 512:, :] ], 0) current_frame = tf.concat([ current_frame[:, :256, :512, :], current_frame[:, :256, 512:, :], current_frame[:, 256:, :512, :], current_frame[:, 256:, 512:, :] ], 0) # Create network. net = DeepLab_Fast({'data': image_batch}, num_classes=NUM_CLASSES) flowNet = FlowNets(current_frame, key_frame) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_pred = net.layers['fc_out'] raw_output = tf.image.resize_bilinear(raw_pred, output_size) raw_output = tf.argmax(raw_output, axis=3) flows = flowNet.inference() warp_pred = warp( tf.image.resize_bilinear(raw_pred, flows['flow'].get_shape()[1:3]), flows['flow']) scale_pred = tf.multiply(warp_pred, flows['scale']) wrap_output = tf.image.resize_bilinear(scale_pred, output_size) output = tf.argmax(wrap_output, axis=3) # Calculate confidence score. wight = tf.where(tf.equal(current_output, 255), tf.zeros_like(current_output), tf.ones_like(current_output)) accuracy = tf.where(tf.equal(output, current_output), wight, tf.zeros_like(current_output)) average = tf.divide(tf.reduce_sum(tf.contrib.layers.flatten(accuracy), 1), tf.reduce_sum(tf.contrib.layers.flatten(wight), 1)) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # Load weights. ckpt = tf.train.get_checkpoint_state(args.restore_from) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found.') if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) list_file = open(args.data_list, 'r') score_list = [] ft_list = [] for step in range(args.num_steps): f1, f2, f3 = list_file.readline().split('\n')[0].split(' ') f1 = os.path.join(args.data_dir, f1) f2 = os.path.join(args.data_dir, f2) current_seg = sess.run(raw_output, feed_dict={image1_filename: f2}) flow_feature, score = sess.run([flows['feature'], average], feed_dict={ image1_filename: f1, image2_filename: f2, current_output: current_seg }) for i in range(4): if score[i] > args.clip: ft_list.append(flow_feature[i]) score_list.append(score[i] * 100) if step % 100 == 0: print(step) # save confidence score and feature np.save(args.save_dir + "X", ft_list) np.save(args.save_dir + "Y", score_list) print("Generate finish!")
def log_prob(self, mean, var, x): return -(tf.divide(tf.square(x - mean), 2 * var) + tf.log(tf.sqrt(2 * np.pi * var)))
def miniception_v6(is_training, images, params, mode): """Compute outputs of the model (embeddings for triplet loss). Adding L2-norm layer to miniception_v2 (maybe add a learnable scaling parameter alpha, see paper L2-constraint softmax) Args: is_training: (bool) whether we are training or not images: (dict) contains the inputs of the graph (features) this can be `tf.placeholder` or outputs of `tf.data` params: (Params) hyperparameters Returns: output: (tf.Tensor) output of the model """ # Apply dropout to the input layer input_dropout = tf.layers.dropout(images, rate=params.input_dropout, training=is_training, name='input_dropout') out = input_dropout # 448 x 448 x num_channels if params.image_size != 448: raise ValueError( "Image size should be equal to 448 if you want to use miniception_v5." ) out = tf.layers.conv2d(out, 16, 7, strides=2, padding='same', activation=tf.nn.relu) assert out.shape[1:] == [224, 224, 16], "output has shape {}".format(out.shape) # 224 x 224 x 16 out = tf.layers.max_pooling2d(out, 3, strides=2, padding='same') assert out.shape[1:] == [112, 112, 16], "output has shape {}".format(out.shape) # 112 x 112 x 16 out = tf.layers.conv2d(out, 32, 3, strides=1, padding='same', activation=tf.nn.relu) assert out.shape[1:] == [112, 112, 32], "output has shape {}".format(out.shape) # 112 x 112 x 32 out = tf.layers.max_pooling2d(out, 3, strides=2, padding='same') assert out.shape[1:] == [56, 56, 32], "output has shape {}".format(out.shape) # 56 x 56 x 16 out = tf.layers.conv2d(out, 64, 3, strides=1, padding='same', activation=tf.nn.relu) assert out.shape[1:] == [56, 56, 64], "output has shape {}".format(out.shape) # 56 x 56 x 64 out = tf.layers.max_pooling2d(out, 3, strides=2, padding='same') assert out.shape[1:] == [28, 28, 64], "output has shape {}".format(out.shape) # 28 x 28 x 64 out = tf.nn.local_response_normalization(out) out = tf.layers.conv2d(out, 96, 3, padding='same', activation=tf.nn.relu) assert out.shape[1:] == [28, 28, 96], "output has shape {}".format(out.shape) # 28 x 28 x 96 out = tf.nn.local_response_normalization(out) out = tf.layers.max_pooling2d(out, 3, strides=2, padding='same') assert out.shape[1:] == [14, 14, 96], "output has shape {}".format(out.shape) # 14 x 14 x 96 # Miniception module 1 # ------------------ with tf.variable_scope('miniception_block1'): with tf.variable_scope('branch1x1'): branch1x1 = tf.layers.conv2d(out, 32, 1, padding='same', activation=tf.nn.relu) with tf.variable_scope('branch5x5'): branch5x5 = tf.layers.conv2d(out, 8, 1, activation=tf.nn.relu) branch5x5 = tf.layers.conv2d(branch5x5, 16, 5, padding='same', activation=tf.nn.relu) with tf.variable_scope('branch3x3'): branch3x3 = tf.layers.conv2d(out, 48, 1, activation=tf.nn.relu) branch3x3 = tf.layers.conv2d(branch3x3, 64, 3, padding='same', activation=tf.nn.relu) with tf.variable_scope('branch_pool'): branch_pool = tf.layers.average_pooling2d(out, 3, strides=1, padding='same') branch_pool = tf.layers.conv2d(branch_pool, 16, 1, padding='same', activation=tf.nn.relu) out = tf.concat(axis=3, values=[branch1x1, branch5x5, branch3x3, branch_pool]) # 14 x 14 x 128 # Transitional max pooling layer # ------------------------------ out = tf.layers.max_pooling2d(out, 3, strides=2, padding='same') assert out.shape[1:] == [7, 7, 128], "output has shape {}".format(out.shape) # 7 x 7 x 128 # Miniception module 2 # ------------------ with tf.variable_scope('miniception_block2'): with tf.variable_scope('branch1x1'): branch1x1 = tf.layers.conv2d(out, 64, 1, padding='same', activation=tf.nn.relu) with tf.variable_scope('branch5x5'): branch5x5 = tf.layers.conv2d(out, 16, 1, activation=tf.nn.relu) branch5x5 = tf.layers.conv2d(branch5x5, 48, 5, padding='same', activation=tf.nn.relu) with tf.variable_scope('branch3x3'): branch3x3 = tf.layers.conv2d(out, 64, 1, activation=tf.nn.relu) branch3x3 = tf.layers.conv2d(branch3x3, 96, 3, padding='same', activation=tf.nn.relu) with tf.variable_scope('branch_pool'): branch_pool = tf.layers.average_pooling2d(out, 3, strides=1, padding='same') branch_pool = tf.layers.conv2d(branch_pool, 32, 1, padding='same', activation=tf.nn.relu) out = tf.concat(axis=3, values=[branch1x1, branch5x5, branch3x3, branch_pool]) # 7 x 7 x 240 assert out.shape[1:] == [7, 7, 240], "out shape: {}".format(out.shape) # Average pooling reduction # ------------------------- out = tf.layers.average_pooling2d(out, 7, strides=1) # 1 x 1 x 240 # Flatten layer with dropout # -------------------------- out = tf.reshape(out, [-1, 1 * 1 * 240]) out = tf.layers.dropout(out, rate=params.output_dropout, training=is_training, name='output_dropout') # Final dense layer (embeddings) followed by L2 normalization # ----------------------------------------------------------- with tf.variable_scope('fc'): out = tf.layers.dense(out, params.embedding_size) out = tf.divide( out, tf.expand_dims(tf.norm(out, ord='euclidean', axis=1) + 1e-16, 1)) out = params.alpha * out return out
def build_model(self, video, video_mask, caption, caption_1, caption_mask): caption_mask = tf.cast(caption_mask, tf.float32) video_mask = tf.cast(video_mask, tf.float32) # for decoding video = tf.divide(video, self.global_max_feat) video_flat = tf.reshape(video, [-1, self.dim_image]) # (b x nv) x d image_emb = tf.nn.xw_plus_b(video_flat, self.encode_image_W, self.encode_image_b) # (b x nv) x h image_emb = tf.reshape( image_emb, [self.batch_size, self.n_video_steps, self.dim_hidden ]) # b x nv x h c_init = tf.zeros([self.batch_size, self.dim_hidden]) # b x h m_init = tf.zeros([self.batch_size, self.dim_hidden]) # b x h state2 = (c_init, m_init) # 2 x b x h ######## Encoding Stage ######### # encoding video # mean pooling && mapping into (-1, 1) range output1 = tf.nn.tanh(tf.reduce_mean(image_emb, axis=1)) # b x h # encoding sentence with tf.variable_scope("model") as scope: for i in xrange(self.n_caption_steps): if i > 0: scope.reuse_variables() with tf.variable_scope("LSTM2"): with tf.device(cpu_device): current_embed = tf.nn.embedding_lookup( self.Wemb, caption_1[:, i]) # b x h output2, state2 = self.lstm2_dropout( current_embed, state2) # b x h ######## Encoding Stage ######### ######## Semantic Learning Stage ######## input_state = tf.concat([output1, output2], 1) # b x (2 * h) loss_latent, output_semantic = self.vae(input_state) ######## Semantic Learning Stage ######## ####### tied loss ########## sh_pred = tf.tanh(tf.nn.xw_plus_b(output1, self.sv_W, self.s_b)) # b x h loss_tied_1 = tf.reduce_sum(tf.square(tf.subtract(output2, sh_pred))) vh_pred = tf.tanh(tf.nn.xw_plus_b(output2, self.vs_W, self.v_b)) # b x h loss_tied_2 = tf.reduce_sum(tf.square(tf.subtract(output1, vh_pred))) loss_tied = loss_tied_1 + loss_tied_2 tf.summary.scalar('loss_tied_1', loss_tied_1) tf.summary.scalar('loss_tied_2', loss_tied_2) tf.summary.histogram('vh_pred', vh_pred) tf.summary.histogram('sh_pred', sh_pred) ####### tied loss ########## ######## Decoding Stage ########## state3 = (c_init, m_init) # 2 x b x h state4 = (c_init, m_init) # 2 x b x h current_embed = tf.zeros([self.batch_size, self.dim_hidden]) # b x h video_prev = tf.zeros([self.batch_size, self.dim_hidden]) loss_caption = 0.0 loss_video = 0.0 ## decoding sentence without attention with tf.variable_scope("model") as scope: with tf.variable_scope("LSTM3"): _, state3 = self.lstm3_dropout(output_semantic, state3) # b x h for i in xrange(n_caption_steps): scope.reuse_variables() with tf.variable_scope("LSTM3"): output3, state3 = self.lstm3_dropout( current_embed, state3) # b x h labels = tf.expand_dims(caption[:, i], 1) # b x 1 indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1) # b x 1 concated = tf.concat([indices, labels], 1) # b x 2 onehot_labels = tf.sparse_to_dense( concated, tf.stack([self.batch_size, self.n_words]), 1.0, 0.0) # b x w with tf.device(cpu_device): current_embed = tf.nn.embedding_lookup( self.Wemb, caption[:, i]) logit_words = tf.nn.xw_plus_b(output3, self.embed_word_W, self.embed_word_b) # b x w cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits=logit_words, labels=onehot_labels) # b x 1 cross_entropy = cross_entropy * caption_mask[:, i] # b x 1 loss_caption += tf.reduce_sum(cross_entropy) # 1 ## decoding video without attention with tf.variable_scope("model") as scope: ## TODO: add attention for video decoding ## write into memory first with tf.variable_scope("LSTM4"): _, state4 = self.lstm4_dropout(output_semantic, state4) for i in xrange(self.n_video_steps): scope.reuse_variables() with tf.variable_scope("LSTM4"): output4, state4 = self.lstm4_dropout(video_prev, state4) decode_image = tf.nn.xw_plus_b(output4, self.decode_image_W, self.decode_image_b) # b x d_im video_prev = image_emb[:, i, :] # b x h euclid_loss = tf.reduce_sum(tf.square( tf.subtract(decode_image, video[:, i, :])), axis=1, keep_dims=True) # b x 1 euclid_loss = euclid_loss * video_mask[:, i] # b x 1 loss_video += tf.reduce_sum(euclid_loss) # 1 loss_caption = loss_caption / tf.reduce_sum(caption_mask) loss_video = loss_video / tf.reduce_sum(video_mask) loss = tf.constant(caption_weight) * loss_caption + tf.constant(video_weight) * loss_video + \ tf.constant(latent_weight) * loss_latent + tf.constant(tied_weight) * loss_tied return loss, loss_caption, loss_tied, loss_latent, loss_video, output_semantic, output1, output2
def build_training_graph(network, classes, learning_rate, adversary_learning_rate, adversary_threshold): # Truth labels for the network Y = network['Y'] # The unscaled network output X = network['mesh'] # The unscaled adverserial networks output A = network['adversary'][:, 0] # The alpha channel from the training data to remove unlabelled points from the gradients W = network['W'] S = tf.where(tf.greater(W, 0)) Y = tf.gather_nd(Y, S) X = tf.gather_nd(X, S) A = tf.nn.sigmoid(tf.gather_nd(A, S)) # Sigmoid as the final layer training_summary = [] validation_summary = [] image_summary = [] # Gather our individual output for training with tf.name_scope('Training'): # Global training step global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step') # Our loss function with tf.name_scope('Loss'): # Unweighted loss, before the adversary decides which samples are more important unweighted_mesh_loss = tf.nn.softmax_cross_entropy_with_logits_v2( logits=X, labels=Y, dim=1) # Calculate the labels for the adversary a_labels = tf.reduce_sum( tf.abs(tf.subtract(Y, tf.nn.softmax(X, dim=1))), axis=1) / 2.0 # Only use gradients from areas where the adversary has larger error, this avoids a large number of smaller # gradients overpowering the areas where the network has legitimate error. # This technique means that the adversary network will never converge, but we don't ever want it to a_idx = tf.where( tf.greater(tf.abs(a_labels - A), adversary_threshold)) adversary_loss = tf.losses.mean_squared_error( predictions=tf.gather_nd(A, a_idx), labels=tf.stop_gradient(tf.gather_nd(a_labels, a_idx)), ) # Calculate the loss weights for each of the classes scatters = [] for i in range(len(classes)): # Indexes of truth samples for this class idx = tf.where(Y[:, i]) pts = tf.gather_nd(A, idx) pts = tf.divide(pts, tf.reduce_sum(pts)) pts = tf.scatter_nd(idx, pts, tf.shape(A, out_type=tf.int64)) # Either our weights, or if there were none, zeros scatters.append( tf.cond(tf.equal(tf.size(idx), 0), lambda: tf.zeros_like(A), lambda: pts)) # Even if we don't have all classes, the weights should sum to 1 active_classes = tf.cast( tf.count_nonzero( tf.stack([tf.count_nonzero(s) for s in scatters])), tf.float32) W = tf.add_n(scatters) W = tf.divide(W, active_classes) # Weighted mesh loss, sum rather than mean as we have already normalised based on number of points weighted_mesh_loss = tf.reduce_sum( tf.multiply(unweighted_mesh_loss, tf.stop_gradient(W))) training_summary.append( tf.summary.scalar('Mesh Loss', weighted_mesh_loss)) training_summary.append( tf.summary.scalar('Adversary Loss', adversary_loss)) # Our optimisers with tf.name_scope('Optimiser'): mesh_optimiser = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(weighted_mesh_loss, global_step=global_step) adversary_optimiser = tf.train.GradientDescentOptimizer( learning_rate=adversary_learning_rate).minimize(adversary_loss) # Calculate accuracy with tf.name_scope('Validation'): # Work out which class is larger and make 1 positive and 0 negative X = tf.nn.softmax(X) for i, c in enumerate(classes): name = c[0] with tf.name_scope(name.title()): idx = tf.where(Y[:, i]) predictions = tf.cast(tf.equal(tf.argmax(X, axis=1), i), tf.int32) labels = tf.cast(tf.equal(tf.argmax(Y, axis=1), i), tf.int32) weights = tf.gather_nd(W, idx) unweighted = tf.gather_nd(unweighted_mesh_loss, idx) # Get our confusion matrix tp = tf.cast(tf.count_nonzero(predictions * labels), tf.float32) tn = tf.cast( tf.count_nonzero((predictions - 1) * (labels - 1)), tf.float32) fp = tf.cast(tf.count_nonzero(predictions * (labels - 1)), tf.float32) fn = tf.cast(tf.count_nonzero((predictions - 1) * labels), tf.float32) # Calculate our confusion matrix validation_summary.append( tf.summary.scalar( 'Loss', tf.reduce_sum(tf.multiply(unweighted, weights)))) validation_summary.append( tf.summary.scalar('Precision', tp / (tp + fp))) validation_summary.append( tf.summary.scalar('Recall', tp / (tp + fn))) validation_summary.append( tf.summary.scalar('Accuracy', (tp + tn) / (tp + fp + tn + fn))) with tf.name_scope('Global'): # Monitor loss and metrics validation_summary.append( tf.summary.scalar('Mesh Loss', weighted_mesh_loss)) validation_summary.append( tf.summary.scalar('Adversary Loss', adversary_loss)) with tf.name_scope('Mesh'): mesh_drawer = MeshDrawer(classes) image_summary.append( tf.summary.image( 'Mesh', tf.py_func(mesh_drawer.mesh_image, [ network['raw'], network['px'], network['n'], tf.nn.softmax(network['mesh']) ], tf.uint8, False), max_outputs= 10000, # Doesn't matter as we limit it at the dataset/batch level )) with tf.name_scope('Adversary'): image_summary.append( tf.summary.image( 'Adversary', tf.py_func( mesh_drawer.adversary_image, [ network['raw'], network['px'], network['n'], tf.nn.sigmoid(network['adversary'][:, 0]) ], tf.uint8, False, ), max_outputs= 10000, # Doesn't matter as we limit it at the dataset/batch level )) for v in tf.trainable_variables(): validation_summary.append(tf.summary.histogram(v.name, v)) # Merge all summaries into a single op training_summary_op = tf.summary.merge(training_summary) validation_summary_op = tf.summary.merge(validation_summary) image_summary_op = tf.summary.merge(image_summary) return { 'mesh_optimiser': mesh_optimiser, 'mesh_loss': weighted_mesh_loss, 'adversary_optimiser': adversary_optimiser, 'adversary_loss': adversary_loss, 'training_summary': training_summary_op, 'validation_summary': validation_summary_op, 'image_summary': image_summary_op, 'global_step': global_step, }
def normalize_rewards(R): with tf.name_scope("normalize_rewards"): mean, var = tf.nn.moments(R, axes=[0], name='MeanVar') R = tf.subtract(R, mean) R = tf.divide(R, var) return R
def get_loss(net_output, labels, alpha, margin, allow_full_loss): """ input: net_output:{'semseg', 'semseg_logits','simmat','conf','conf_logits'} labels:{'ptsgroup', 'semseg','semseg_mask','group_mask'} """ pts_group_label = tf.cast(labels['ptsgroup'], tf.float32) pts_semseg_label = tf.cast(labels['semseg'], tf.float32) group_mask = tf.expand_dims(labels['group_mask'], dim=2) pred_confidence_logits = net_output['conf'] pred_simmat = net_output['simmat'] # Similarity Matrix loss B = pts_group_label.get_shape()[0] N = pts_group_label.get_shape()[1] onediag = tf.ones([B, N], tf.float32) group_mat_label = tf.matmul(pts_group_label, tf.transpose(pts_group_label, perm=[ 0, 2, 1 ])) #BxNxN: (i,j) if i and j in the same group group_mat_label = tf.matrix_set_diag(group_mat_label, onediag) sem_mat_label = tf.matmul( pts_semseg_label, tf.transpose(pts_semseg_label, perm=[ 0, 2, 1 ])) #BxNxN: (i,j) if i and j are the same semantic category sem_mat_label = tf.matrix_set_diag(sem_mat_label, onediag) samesem_mat_label = sem_mat_label diffsem_mat_label = tf.subtract(1.0, sem_mat_label) samegroup_mat_label = group_mat_label diffgroup_mat_label = tf.subtract(1.0, group_mat_label) diffgroup_samesem_mat_label = tf.multiply(diffgroup_mat_label, samesem_mat_label) diffgroup_diffsem_mat_label = tf.multiply(diffgroup_mat_label, diffsem_mat_label) num_samegroup = tf.reduce_sum(samegroup_mat_label) num_diffgroup_samesem = tf.reduce_sum(diffgroup_samesem_mat_label) num_diffgroup_diffsem = tf.reduce_sum(diffgroup_diffsem_mat_label) # Double hinge loss C_same = tf.constant(margin[0], name="C_same") # same semantic category C_diff = tf.constant(margin[1], name="C_diff") # different semantic category pos = tf.multiply(samegroup_mat_label, pred_simmat) # minimize distances if in the same group neg_samesem = alpha * tf.multiply( diffgroup_samesem_mat_label, tf.maximum(tf.subtract(C_same, pred_simmat), 0)) neg_diffsem = tf.multiply(diffgroup_diffsem_mat_label, tf.maximum(tf.subtract(C_diff, pred_simmat), 0)) simmat_loss = neg_samesem + neg_diffsem + pos group_mask_weight = tf.matmul(group_mask, tf.transpose(group_mask, perm=[0, 2, 1])) # simmat_loss = tf.add(simmat_loss, pos) simmat_loss = tf.multiply(simmat_loss, group_mask_weight) simmat_loss = tf.divide( tf.reduce_sum(simmat_loss, axis=[1, 2]), tf.maximum(1e-6, tf.reduce_sum(group_mask_weight, axis=[1, 2]))) simmat_loss = tf.reduce_mean(simmat_loss) # Semantic Segmentation loss ptsseg_loss = tf.nn.softmax_cross_entropy_with_logits( logits=net_output['semseg_logits'], labels=pts_semseg_label) ptsseg_loss = tf.multiply(ptsseg_loss, labels['semseg_mask']) ptsseg_loss = tf.divide( tf.reduce_sum(ptsseg_loss, axis=-1), tf.maximum(1e-6, tf.reduce_sum(labels['semseg_mask'], axis=-1))) ptsseg_loss = tf.reduce_mean(ptsseg_loss) # Confidence Map loss Pr_obj = tf.reduce_sum(pts_semseg_label, axis=-1) ng_label = group_mat_label ng_label = tf.greater(ng_label, tf.constant(0.5)) ng = tf.less(pred_simmat, tf.constant(margin[0])) epsilon = tf.constant( np.ones(ng_label.get_shape()[:2]).astype(np.float32) * 1e-6) pts_iou = tf.div( tf.reduce_sum(tf.cast(tf.logical_and(ng, ng_label), tf.float32), axis=2), (tf.reduce_sum(tf.cast(tf.logical_or(ng, ng_label), tf.float32), axis=2) + epsilon)) confidence_label = tf.multiply(pts_iou, Pr_obj) # BxN confidence_loss = tf.reduce_mean( tf.squared_difference(confidence_label, tf.squeeze(pred_confidence_logits, [2]))) loss = simmat_loss + allow_full_loss * ptsseg_loss + allow_full_loss * confidence_loss grouperr = tf.abs(tf.cast(ng, tf.float32) - tf.cast(ng_label, tf.float32)) return loss, tf.reduce_mean(grouperr), \ tf.reduce_sum(grouperr * diffgroup_samesem_mat_label), num_diffgroup_samesem, \ tf.reduce_sum(grouperr * diffgroup_diffsem_mat_label), num_diffgroup_diffsem, \ tf.reduce_sum(grouperr * samegroup_mat_label), num_samegroup
def build_model(self): self.images = tf.placeholder(tf.float32, [self.batch_size] + self.image_shape, name='real_images') self.sample_images = tf.placeholder(tf.float32, [self.batch_size] + self.image_shape, name='sample_images') self.z = tf.placeholder(tf.float32, [self.batch_size, self.z_dim], name='z') self.z_sum = tf.summary.histogram("z", self.z) self.t = tf.placeholder(tf.float32, [self.batch_size, self.text_vector_dim], name='t') self.t_sum = tf.summary.histogram("t", self.t) self.t_wr = tf.placeholder(tf.float32, [self.batch_size, self.text_vector_dim], name='t_wr') self.t_wr_sum = tf.summary.histogram("t_wr", self.t_wr) #self.images_wr = tf.placeholder( # tf.float32, [self.batch_size] + self.image_shape, name='wrong_images') self.G = self.generator(self.z, self.t) self.D_rl, self.D_logits_rl = self.discriminator(self.images, self.t) self.D_fk, self.D_logits_fk = self.discriminator(self.G, self.t, reuse=True) self.D_wr, self.D_logits_wr = self.discriminator(self.images, self.t_wr, reuse=True) self.sampler = self.sampler(self.z, self.t) self.G_sum = tf.image_summary("G", self.G) self.d_rl_sum = tf.summary.histogram("d", self.D_rl) self.d_fk_sum = tf.summary.histogram("d_", self.D_fk) self.d_wr_sum = tf.summary.histogram("d_wr", self.D_wr) # cross entropy loss self.g_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(self.D_logits_fk, tf.ones_like(self.D_fk))) self.d_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(self.D_logits_rl, tf.ones_like(self.D_rl))) self.d_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(self.D_logits_fk, tf.zeros_like(self.D_fk))) self.d_loss_wrong = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(self.D_logits_wr, tf.zeros_like(self.D_wr))) ''' # least square loss self.d_loss_real = 0.5 * tf.reduce_mean((self.D_logits_rl - tf.ones_like(self.D_logits_rl))**2) self.d_loss_fake = 0.5 * tf.reduce_mean((self.D_logits_fk - tf.zeros_like(self.D_logits_fk))**2) self.d_loss_wrong = 0.5 * tf.reduce_mean((self.D_logits_wr - tf.zeros_like(self.D_logits_wr))**2) self.g_loss = 0.5 * tf.reduce_mean((self.D_logits_fk - tf.ones_like(self.D_logits_fk))**2) ''' self.d_loss_real_sum = tf.scalar_summary("d_loss_real", self.d_loss_real) self.d_loss_fake_sum = tf.scalar_summary("d_loss_fake", self.d_loss_fake) self.d_loss_wrong_sum = tf.scalar_summary("d_loss_wrong", self.d_loss_wrong) self.d_loss = self.d_loss_real + self.d_loss_fake + self.lam3 * self.d_loss_wrong self.g_loss_sum = tf.scalar_summary("g_loss", self.g_loss) self.d_loss_sum = tf.scalar_summary("d_loss", self.d_loss) t_vars = tf.trainable_variables() self.d_vars = [var for var in t_vars if 'd_' in var.name] self.g_vars = [var for var in t_vars if 'g_' in var.name] self.saver = tf.train.Saver(max_to_keep=50) # mask to generate self.mask = tf.placeholder(tf.float32, [None] + self.image_shape, name='mask') # l1 #self.contextual_loss = tf.reduce_sum( # tf.contrib.layers.flatten( # tf.abs(tf.mul(self.mask, self.G) - tf.mul(self.mask, self.images))), 1) # kl divergence self.contextual_loss = kl_divergence( tf.divide( tf.add( tf.contrib.layers.flatten( tf.image.rgb_to_grayscale( tf.slice(self.G, [0, 0, 0, 0], [ self.batch_size, self.image_size, self.image_size, self.c_dim ]))), 1), 2), tf.divide( tf.add( tf.contrib.layers.flatten( tf.image.rgb_to_grayscale( tf.slice(self.images, [0, 0, 0, 0], [ self.batch_size, self.image_size, self.image_size, self.c_dim ]))), 1), 2)) self.perceptual_loss = self.g_loss self.complete_loss = self.lam1 * self.contextual_loss + self.lam2 * self.perceptual_loss self.grad_complete_loss = tf.gradients(self.complete_loss, self.z)
def discriminative_loss_single(prediction, correct_label, feature_dim, label_shape, delta_v, delta_d, param_var, param_dist, param_reg): ''' Discriminative loss for a single prediction/label pair. :param prediction: inference of network :param correct_label: instance label :feature_dim: feature dimension of prediction :param label_shape: shape of label :param delta_v: cutoff variance distance :param delta_d: curoff cluster distance :param param_var: weight for intra cluster variance :param param_dist: weight for inter cluster distances :param param_reg: weight regularization ''' ### Reshape so pixels are aligned along a vector correct_label = tf.reshape(correct_label, [label_shape[1]*label_shape[0]]) reshaped_pred = tf.reshape(prediction, [label_shape[1]*label_shape[0], feature_dim]) ### Count instances unique_labels, unique_id, counts = tf.unique_with_counts(correct_label) counts = tf.cast(counts, tf.float32) num_instances = tf.size(unique_labels) segmented_sum = tf.unsorted_segment_sum(reshaped_pred, unique_id, num_instances) mu = tf.div(segmented_sum, tf.reshape(counts, (-1, 1))) mu_expand = tf.gather(mu, unique_id) ### Calculate l_var distance = tf_norm(tf.subtract(mu_expand, reshaped_pred), axis=1) distance = tf.subtract(distance, delta_v) distance = tf.clip_by_value(distance, 0., distance) distance = tf.square(distance) l_var = tf.unsorted_segment_sum(distance, unique_id, num_instances) l_var = tf.div(l_var, counts) l_var = tf.reduce_sum(l_var) l_var = tf.divide(l_var, tf.cast(num_instances, tf.float32)) ### Calculate l_dist # Get distance for each pair of clusters like this: # mu_1 - mu_1 # mu_2 - mu_1 # mu_3 - mu_1 # mu_1 - mu_2 # mu_2 - mu_2 # mu_3 - mu_2 # mu_1 - mu_3 # mu_2 - mu_3 # mu_3 - mu_3 mu_interleaved_rep = tf.tile(mu, [num_instances, 1]) mu_band_rep = tf.tile(mu, [1, num_instances]) mu_band_rep = tf.reshape(mu_band_rep, (num_instances*num_instances, feature_dim)) mu_diff = tf.subtract(mu_band_rep, mu_interleaved_rep) # Filter out zeros from same cluster subtraction intermediate_tensor = tf.reduce_sum(tf.abs(mu_diff),axis=1) zero_vector = tf.zeros(1, dtype=tf.float32) bool_mask = tf.not_equal(intermediate_tensor, zero_vector) mu_diff_bool = tf.boolean_mask(mu_diff, bool_mask) mu_norm = tf_norm(mu_diff_bool, axis=1) mu_norm = tf.subtract(2.*delta_d, mu_norm) mu_norm = tf.clip_by_value(mu_norm, 0., mu_norm) mu_norm = tf.square(mu_norm) l_dist = tf.reduce_mean(mu_norm) ### Calculate l_reg l_reg = tf.reduce_mean(tf_norm(mu, axis=1)) param_scale = 1. l_var = param_var * l_var l_dist = param_dist * l_dist l_reg = param_reg * l_reg loss = param_scale*(l_var + l_dist + l_reg) return loss, l_var, l_dist, l_reg
def rasterize_triangles(vertices, attributes, triangles, projection_matrices, image_width, image_height, background_value): """Rasterizes the input scene and computes interpolated vertex attributes. NOTE: the rasterizer does no triangle clipping. Triangles that lie outside the viewing frustum (esp. behind the camera) may be drawn incorrectly. Args: vertices: 3-D float32 tensor with shape [batch_size, vertex_count, 3]. Each triplet is an xyz position in model space. attributes: 3-D float32 tensor with shape [batch_size, vertex_count, attribute_count]. Each vertex attribute is interpolated across the triangle using barycentric interpolation. triangles: 2-D int32 tensor with shape [triangle_count, 3]. Each triplet should contain vertex indices describing a triangle such that the triangle's normal points toward the viewer if the forward order of the triplet defines a clockwise winding of the vertices. Gradients with respect to this tensor are not available. projection_matrices: 3-D float tensor with shape [batch_size, 4, 4] containing model-view-perspective projection matrices. image_width: int specifying desired output image width in pixels. image_height: int specifying desired output image height in pixels. background_value: a 1-D float32 tensor with shape [attribute_count]. Pixels that lie outside all triangles take this value. Returns: A 4-D float32 tensor with shape [batch_size, image_height, image_width, attribute_count], containing the interpolated vertex attributes at each pixel. Raises: ValueError: An invalid argument to the method is detected. """ if not image_width > 0: raise ValueError('Image width must be > 0.') if not image_height > 0: raise ValueError('Image height must be > 0.') if len(vertices.shape) != 3: raise ValueError('The vertex buffer must be 3D.') batch_size = vertices.shape[0].value vertex_count = vertices.shape[1].value # We map the coordinates to normalized device coordinates before passing # the scene to the rendering kernel to keep as many ops in tensorflow as # possible. homogeneous_coord = tf.ones([batch_size, vertex_count, 1], dtype=tf.float32) vertices_homogeneous = tf.concat([vertices, homogeneous_coord], 2) # Vertices are given in row-major order, but the transformation pipeline is # column major: clip_space_points = tf.matmul(vertices_homogeneous, projection_matrices, transpose_b=True) # Perspective divide, first thresholding the homogeneous coordinate to avoid # the possibility of NaNs: clip_space_points_w = tf.maximum( tf.abs(clip_space_points[:, :, 3:4]), _MINIMUM_PERSPECTIVE_DIVIDE_THRESHOLD) * tf.sign( clip_space_points[:, :, 3:4]) normalized_device_coordinates = (clip_space_points[:, :, 0:3] / clip_space_points_w) per_image_uncorrected_barycentric_coordinates = [] per_image_vertex_ids = [] for im in range(vertices.shape[0]): barycentric_coords, triangle_ids, _ = ( rasterize_triangles_module.rasterize_triangles( normalized_device_coordinates[im, :, :], triangles, image_width, image_height)) per_image_uncorrected_barycentric_coordinates.append( tf.reshape(barycentric_coords, [-1, 3])) # Gathers the vertex indices now because the indices don't contain a batch # identifier, and reindexes the vertex ids to point to a (batch,vertex_id) vertex_ids = tf.gather(triangles, tf.reshape(triangle_ids, [-1])) reindexed_ids = tf.add(vertex_ids, im * vertices.shape[1].value) per_image_vertex_ids.append(reindexed_ids) uncorrected_barycentric_coordinates = tf.concat( per_image_uncorrected_barycentric_coordinates, axis=0) vertex_ids = tf.concat(per_image_vertex_ids, axis=0) # Indexes with each pixel's clip-space triangle's extrema (the pixel's # 'corner points') ids to get the relevant properties for deferred shading. flattened_vertex_attributes = tf.reshape(attributes, [batch_size * vertex_count, -1]) corner_attributes = tf.gather(flattened_vertex_attributes, vertex_ids) # Barycentric interpolation is linear in the reciprocal of the homogeneous # W coordinate, so we use these weights to correct for the effects of # perspective distortion after rasterization. perspective_distortion_weights = tf.reciprocal( tf.reshape(clip_space_points_w, [-1])) corner_distortion_weights = tf.gather(perspective_distortion_weights, vertex_ids) # Apply perspective correction to the barycentric coordinates. This step is # required since the rasterizer receives normalized-device coordinates (i.e., # after perspective division), so it can't apply perspective correction to the # interpolated values. weighted_barycentric_coordinates = tf.multiply( uncorrected_barycentric_coordinates, corner_distortion_weights) barycentric_reweighting_factor = tf.reduce_sum( weighted_barycentric_coordinates, axis=1) corrected_barycentric_coordinates = tf.divide( weighted_barycentric_coordinates, tf.expand_dims(tf.maximum(barycentric_reweighting_factor, _MINIMUM_REWEIGHTING_THRESHOLD), axis=1)) # Computes the pixel attributes by interpolating the known attributes at the # corner points of the triangle interpolated with the barycentric coordinates. weighted_vertex_attributes = tf.multiply( corner_attributes, tf.expand_dims(corrected_barycentric_coordinates, axis=2)) summed_attributes = tf.reduce_sum(weighted_vertex_attributes, axis=1) attribute_images = tf.reshape(summed_attributes, [batch_size, image_height, image_width, -1]) # Barycentric coordinates should approximately sum to one where there is # rendered geometry, but be exactly zero where there is not. alphas = tf.clip_by_value( tf.reduce_sum(2.0 * corrected_barycentric_coordinates, axis=1), 0.0, 1.0) alphas = tf.reshape(alphas, [batch_size, image_height, image_width, 1]) attributes_with_background = (alphas * attribute_images + (1.0 - alphas) * background_value) return attributes_with_background
def transformer(self): def transpose_for_scores(input_tensor, batch_size, num_attention_heads, seq_length, width): output_tensor = tf.reshape( input_tensor, [batch_size, seq_length, num_attention_heads, width]) output_tensor = tf.transpose(output_tensor, [0, 2, 1, 3]) return output_tensor def get_shape_list(tensor, expected_rank=None, name=None): """Returns a list of the shape of tensor, preferring static dimensions. Args: tensor: A tf.Tensor object to find the shape of. expected_rank: (optional) int. The expected rank of `tensor`. If this is specified and the `tensor` has a different rank, and exception will be thrown. name: Optional name of the tensor for the error message. Returns: A list of dimensions of the shape of tensor. All static dimensions will be returned as python integers, and dynamic dimensions will be returned as tf.Tensor scalars. """ if name is None: name = tensor.name shape = tensor.shape.as_list() non_static_indexes = [] for (index, dim) in enumerate(shape): if dim is None: non_static_indexes.append(index) if not non_static_indexes: return shape dyn_shape = tf.shape(tensor) for index in non_static_indexes: shape[index] = dyn_shape[index] return shape def create_initializer(initializer_range=0.02): """Creates a `truncated_normal_initializer` with the given range.""" return tf.truncated_normal_initializer(stddev=initializer_range) def gelu(x): """Gaussian Error Linear Unit. This is a smoother version of the RELU. Original paper: https://arxiv.org/abs/1606.08415 Args: x: float Tensor to perform activation. Returns: `x` with the GELU activation applied. """ cdf = 0.5 * (1.0 + tf.tanh( (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))) return x * cdf def reshape_from_matrix(output_tensor, orig_shape_list): """Reshapes a rank 2 tensor back to its original rank >= 2 tensor.""" if len(orig_shape_list) == 2: return output_tensor output_shape = get_shape_list(output_tensor) orig_dims = orig_shape_list[0:-1] width = output_shape[-1] return tf.reshape(output_tensor, orig_dims + [width]) with tf.variable_scope("encoder"): # [batch_size, pre_length, hidden_dim] # print(self.precursor) # self.precursor = tf.Print(self.precursor, [self.precursor]) precursor = tf.nn.embedding_lookup(params=self.music_embedding_all, ids=self.precursor, name="precursor") # [batch_size, pre_length, 1] tran_mask = tf.to_float( tf.expand_dims(tf.sequence_mask(self.pre_length), -1)) # [batch_size, hidden_dim] tran_fist = tf.divide( tf.reduce_sum(precursor * tran_mask, 1), tf.reshape(tf.to_float(self.pre_length), [-1, 1])) tran_inputs = tf.concat( [tf.expand_dims(tran_fist, 1), self.output_embedding], 1)[:, :-1] if self.hidden_dim % self.num_attention_heads != 0: raise ValueError( "The hidden size (%d) is not a multiple of the number of attention " "heads (%d)" % (self.hidden_dim, self.num_attention_heads)) attention_head_size = int(self.hidden_dim / self.num_attention_heads) input_shape = get_shape_list(tran_inputs) prev_output = self.reshape_to_matrix(tran_inputs) all_layer_outputs = [] for layer_idx in range(self.n_layer): with tf.variable_scope("layer_%d" % layer_idx): layer_input = prev_output with tf.variable_scope("attention"): from_shape = get_shape_list(tran_inputs) batch_size = from_shape[0] seq_length = from_shape[1] from_tensor_2d = self.reshape_to_matrix(layer_input) query_layer = tf.layers.dense( from_tensor_2d, self.num_attention_heads * attention_head_size, activation=None, name="query", kernel_initializer=create_initializer()) # `key_layer` = [B*F, N*H] key_layer = tf.layers.dense( from_tensor_2d, self.num_attention_heads * attention_head_size, activation=None, name="key", kernel_initializer=create_initializer()) # `value_layer` = [B*F, N*H] value_layer = tf.layers.dense( from_tensor_2d, self.num_attention_heads * attention_head_size, activation=None, name="value", kernel_initializer=create_initializer()) # `key_layer` = [B, N, F, H] key_layer = transpose_for_scores( key_layer, batch_size, self.num_attention_heads, self.sequence_length, attention_head_size) # `query_layer` = [B, N, F, H] query_layer = transpose_for_scores( query_layer, batch_size, self.num_attention_heads, self.sequence_length, attention_head_size) # `attention_scores` = [B, N, F, F] attention_scores = tf.matmul(query_layer, key_layer, transpose_b=True) attention_scores = tf.multiply( attention_scores, 1.0 / math.sqrt(float(attention_head_size))) # `attention_mask` = [B, 1, F, F] # attention_mask_expand = tf.expand_dims(attention_mask, axis=[1]) # 下三角矩阵 mask_tri = tf.matrix_band_part( tf.ones( [self.sequence_length, self.sequence_length]), -1, 0) mask_tri = tf.reshape( mask_tri, [1, 1, self.sequence_length, self.sequence_length]) attention_scores = attention_scores * mask_tri + -1e9 * ( 1 - mask_tri) attention_probs = tf.nn.softmax(attention_scores) attention_probs = tf.nn.dropout( attention_probs, self.dropout) value_layer = tf.reshape(value_layer, [ batch_size, self.sequence_length, self.num_attention_heads, attention_head_size ]) # `value_layer` = [B, N, T, H] value_layer = tf.transpose(value_layer, [0, 2, 1, 3]) # `context_layer` = [B, N, F, H] context_layer = tf.matmul(attention_probs, value_layer) # `context_layer` = [B, F, N, H] context_layer = tf.transpose(context_layer, [0, 2, 1, 3]) context_layer = tf.reshape(context_layer, [ batch_size * self.sequence_length, self.num_attention_heads * attention_head_size ]) # [b_s * s_l, hidden_dim] attention_output = tf.concat(context_layer, axis=-1) with tf.variable_scope("output"): attention_output = tf.layers.dense( attention_output, self.hidden_dim, kernel_initializer=create_initializer()) attention_output = tf.nn.dropout( attention_output, self.dropout) attention_output = tf.contrib.layers.layer_norm( inputs=attention_output + layer_input, begin_norm_axis=-1, begin_params_axis=-1) with tf.variable_scope("intermediate"): intermediate_output = tf.layers.dense( attention_output, self.intermediate_size, activation=gelu, kernel_initializer=create_initializer()) with tf.variable_scope("output"): layer_output = tf.layers.dense( intermediate_output, self.hidden_dim, kernel_initializer=create_initializer()) layer_output = tf.nn.dropout(layer_output, self.dropout) layer_output = tf.contrib.layers.layer_norm( layer_output + attention_output) prev_output = layer_output all_layer_outputs.append(layer_output) # [batch_size, sequence_length, hidden_dim] self.transformer_out = reshape_from_matrix(prev_output, input_shape)
import tensorflow as tf import numpy as np embed_table = tf.constant( [[0, 0, 0, 0, 0], [0.11, 0.12, 0.13, 0.14, 0.15], [0.21, 0.22, 0.23, 0.24, 0.25], [0.31, 0.32, 0.33, 0.34, 0.35], [0.41, 0.42, 0.43, 0.44, 0.45]], dtype=tf.float32) input_batch = tf.constant([2, 3, 0]) ue_raw = tf.nn.embedding_lookup(embed_table, input_batch) raw_embedding_sum = tf.reduce_sum(ue_raw, axis=1) non_zero = tf.count_nonzero(raw_embedding_sum) batch_size = tf.reshape(tf.shape(raw_embedding_sum), []) hit_count_ratio = tf.divide(tf.cast(non_zero, tf.float32), tf.cast(batch_size, tf.float32)) #hit_count = tf.reduce_sum(b) #tf.summary.histogram('hit_count', hit_count) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) print('ue_raw=', ue_raw, sess.run(ue_raw)) print('raw_embedding_sum=', raw_embedding_sum, sess.run(raw_embedding_sum)) print('non_zero=', type(non_zero), sess.run(non_zero)) print('batch_size=', type(batch_size), sess.run(batch_size)) print('hit_count_ratio=', hit_count_ratio, sess.run(hit_count_ratio)) ''' ue_raw = fc_ue.get_tensor() tf.summary.histogram(ue_name + "_raw_tensor", ue_raw)
import tensorflow as tf a = tf.constant(21, dtype=tf.float32) b = tf.constant(314, dtype=tf.float32) c = tf.multiply(b, a) d = tf.sin(c) e = tf.divide(b, d) sess = tf.Session() outs = sess.run(e) print "outs = {}".format(outs)
def get_log_prob(self, policy_parameters, sy_ac_na): """ Constructs a symbolic operation for computing the log probability of a set of actions that were actually taken according to the policy arguments: policy_parameters if discrete: logits of a categorical distribution over actions sy_logits_na: (batch_size, self.ac_dim) if continuous: (mean, log_std) of a Gaussian distribution over actions sy_mean: (batch_size, self.ac_dim) sy_logstd: (self.ac_dim,) sy_ac_na: if discrete: (batch_size,) if continuous: (batch_size, self.ac_dim) returns: sy_logprob_n: (batch_size) Hint: For the discrete case, use the log probability under a categorical distribution. For the continuous case, use the log probability under a multivariate gaussian. Question: Won't the probability be approaching zero for any set of actions? and thus log_prob = -inf? I think we are substituting pdf for probability here. Odd. AH! It doesn't matter! What we care about ultimately is the grad(log_prob(at|st)), and dlog_p/dtheta = dlog_p/dlog_pdf * dlog_pdf/dtheta = 1 * dlog_pdf/dtheta (prove dlog_p/dlog_pdf = 1, SOME-OTHER-TIME!!!) But in this case, we can just get log_pdf and later when gradient is taken it = gradient of log_p KEY: I am pretty certain that by saying sy_logstd [=] [action_space_dim, ], that means we are using an identity for covariance matrix So therefore we assume each dimension of the gaussian is independent from the others and thus total_pdf = pdf_dim1*pdf_dim2 * ... * pdfdim_n Or is this just saying that we don't know the covariance matrix??? I'm pretty sure this is a messed up assumption, bc the dimensions of the output action are definitely not independent, as they share most of a neural network in common. Thus I think this wouldn't actually give the correct gradient to maximize the probability of the action taken. Must be close enough? Notes: I believe the probability for an action is given by the softmax function. That's how tf.multinomial interpreted those "logits" inputs """ # raise NotImplementedError if self.discrete: sy_logits_na = policy_parameters softmaxed_logits = tf.nn.softmax(sy_logits_na) #gives [batch_size,action_space] vector # For each entry in batch, select the appropriate chosen action. sy_ac_na is [batch_size,] indexer = tf.stack([tf.range(0,tf.shape(sy_ac_na)[0],1), sy_ac_na], axis = 1) # Makes the [[0,a0],[1,a1],...] array probs_of_chosen_actions = tf.gather_nd(softmaxed_logits,indexer) # gets the responsible action in each row. vector is [batch,] = [p_a1 p_a2 p_a3 ...] # each element of indexer ([k, a_k]) selects the k row and a_k column of softmaxed_logits sy_logprob_n = tf.log(probs_of_chosen_actions) # So flame... # But this entire method is less stable than softmax_cross_entropy_with_logits... Lunar lander before did it the same way... RIGHT? else: sy_mean, sy_logstd = policy_parameters # What needs to happen is I need to take these chosen sy_means, which are [batches,action_space], and get the probability of each action # in each batch sample, using the sy_mean, which is [batches,action_space] and sy_logstd, which is [action_space, ] # I then multiple the entire row of probabilities to get the total probability, and then take the log of that. Tomorrow;) sigma_square = tf.square(sy_logstd) diff_mat = tf.subtract(sy_ac_na,sy_mean) two_pi = tf.constant(2*m.pi,dtype = tf.float32) first_term = tf.divide(tf.cast(1,tf.float32),tf.sqrt(tf.multiply(two_pi,sigma_square))) second_term = tf.exp(tf.negative(tf.divide(tf.square(diff_mat),tf.multiply(tf.cast(2,tf.float32),sigma_square)))) pdf_output = tf.multiply(first_term,second_term) log_pdf = tf.log(pdf_output) sy_logprob_n = tf.reduce_sum(log_pdf,1) # we use sum, bc sum(log_prob) = log(mult(all_probs)) return sy_logprob_n
def result(self): return tf.divide(self.true_positives, self.n)
def neg_loglikelihood(prob_dicts, w_edges): ''' negative loglikelihood of the edges ''' ll = 0 k = 0 with tf.variable_scope('NLL'): for i in range(self.combination): prob_dict = prob_dicts[i] w_edge = w_edges[i] prob_dict = tf.Print(prob_dict, [prob_dict], message="my prob dict values:") print("Debug prob dict shape", tf.shape(prob_dict)) prob_dict_resized = tf.reshape(prob_dict, [-1]) prob_dict_resized = tf.Print( prob_dict_resized, [prob_dict_resized], message="my prob dict resized values:") w_edge_size = tf.stack([tf.shape(w_edge)[0]])[0] w_edge_size = tf.Print(w_edge_size, [w_edge_size], message="my size values:") print("Debug w_edge_shape", tf.shape(w_edge), w_edge.get_shape(), tf.stack([tf.shape(w_edge)[0]])[0]) w_edge_resized = tf.reshape(w_edge, [-1, self.bin_dim]) if self.neg_sample_size > 0: w_edge_resized = tf.reshape( w_edge[:-self.bin_dim * self.neg_sample_size], [-1, self.bin_dim]) w_edge_size_r = tf.stack([tf.shape(w_edge_resized)[0]])[0] w_edge_size_r = tf.Print(w_edge_size_r, [w_edge_size_r], message="my size values r:") w_edge_exp = tf.exp( tf.minimum( w_edge_resized, tf.fill([w_edge_size_r, self.bin_dim], 10.0))) w_edge_pos = tf.reduce_sum(tf.multiply( self.weight_bin[i], w_edge_exp), axis=1) w_edge_total = tf.reduce_sum(w_edge_exp, axis=1) w_edge_score = tf.divide(w_edge_pos, w_edge_total) w_edge_score = tf.Print(w_edge_score, [w_edge_score], message="my w_edge_score values:") prob_dict_resized_shape = tf.stack( [tf.shape(prob_dict_resized)[0]])[0] prob_dict_resized_shape = tf.Print( prob_dict_resized_shape, [prob_dict_resized_shape], message="my prob dict size values:") prob_dict_exp = tf.exp( tf.minimum(prob_dict_resized, tf.fill([prob_dict_resized_shape], 10.0))) prob_dict_exp = tf.Print(prob_dict_exp, [prob_dict_exp], message="my decscore values:") pos_score = prob_dict_exp if self.neg_sample_size > 0: pos_score = prob_dict_exp[:-self.neg_sample_size] st = tf.stack([tf.shape(pos_score)[0]])[0] st = tf.Print(st, [st], message="my st values:") pos_score = tf.Print(pos_score, [pos_score], message="my posscore values:") #pos_weight_score = tf.multiply(tf.reshape(pos_score,[st, 1]), w_edge_score) pos_weight_score = tf.multiply( pos_score, tf.reshape(w_edge_score, [1, -1])) neg_score = tf.cumsum(prob_dict_exp, reverse=True) if self.neg_sample_size > 0: neg_score = tf.cumsum( prob_dict_exp[1:], reverse=True)[:-self.neg_sample_size + 1] softmax_out = tf.divide(pos_weight_score, neg_score) ll += tf.reduce_sum( tf.log(tf.add(softmax_out, tf.fill([1, st], 1e-9)))) #ll = tf.reduce_sum(tf.log(tf.add(tf.multiply(self.adj, softmax_out), tf.fill([self.n,self.n], 1e-9)))) ll = ll / self.combination ll = tf.Print(ll, [ll], message="My LL loss") return (-ll)
# 初始化state为0 state = tf.Variable(0, dtype=tf.float64, name="counter") print(state.name, state.value()) tensor = tf.constant([[ 1, 2, 3, ], [4, 5, 6]]) print(tf.Session().run(tensor)) one = tf.constant(1, dtype=tf.float64) two = tf.constant(2, dtype=tf.float64) # 加一操作,计算的结果为new_value new_value = tf.add(state, one) new_value = tf.add(new_value, one) new_value = tf.add(new_value, one) new_value = tf.add(new_value, one) new_value = tf.divide(new_value, two) # 把new_value变量赋值给state update = tf.assign(state, new_value) # 如果定义了变量Variable,一定要调用下面的方法初始化变量!!! init = tf.global_variables_initializer() with tf.Session() as sess: # 必须调用此方法执行初始化 sess.run(init) for _ in range(3): sess.run(update) print(sess.run(state))