def make_losses(self, pred_relevant, targets_preprocessed, objective_indices, objective_coeffs): # make a loss function and compute some summary numbers per_target_loss = my_ops.mse_ignore_nans(pred_relevant, targets_preprocessed, reduction_indices=0) loss = tf.reduce_sum(per_target_loss) # compute objective value, just for logging purposes # TODO add multiplication by the objective_coeffs (somehow not trivial) obj = tf.reduce_sum(self.postprocess_predictions(targets_preprocessed), 1) #obj = tf.sum(self.postprocess_predictions(targets_preprocessed[:,objective_indices]) * objective_coeffs[None,:], axis=1) obj_nonan = tf.where(tf.is_nan(obj), tf.zeros_like(obj), obj) num_valid_targets = tf.reduce_sum(1-tf.cast(tf.is_nan(obj), tf.float32)) mean_obj = tf.reduce_sum(obj_nonan) / num_valid_targets # summaries obj_sum = tf.summary.scalar("objective_todo", mean_obj) #TODO per_target_loss_sums = [] #per_target_loss_sums = [tf.summary.scalar(name, loss) for name,loss in zip(self.target_names,per_target_loss)] loss_sum = tf.summary.scalar("full loss", loss) #self.per_target_loss = tf.get_variable('avg_targets', [self.target_dim], initializer=tf.constant_initializer(value=0.)) full_loss = loss errs_to_print = [loss] short_summary = [loss_sum] detailed_summary = per_target_loss_sums + [obj_sum] return full_loss, errs_to_print, short_summary, detailed_summary
def cut(self, hits, start, end): """ Cuts [start:end] diapason from input data :param hits: hits timeseries :param start: start index :param end: end index :return: tuple (train_hits, test_hits, dow, lagged_hits) """ # Pad hits to ensure we have enough array length for prediction hits = tf.concat([hits, tf.fill([self.predict_window], np.NaN)], axis=0) cropped_hit = hits[start:end] # cut day of week cropped_dow = self.inp.dow[start:end] # Cut lagged hits # gather() accepts only int32 indexes cropped_lags = tf.cast(self.inp.lagged_ix[start:end], tf.int32) # Mask for -1 (no data) lag indexes lag_mask = cropped_lags < 0 # Convert -1 to 0 for gather(), it don't accept anything exotic cropped_lags = tf.maximum(cropped_lags, 0) # Translate lag indexes to hit values lagged_hit = tf.gather(hits, cropped_lags) # Convert masked (see above) or NaN lagged hits to zeros lag_zeros = tf.zeros_like(lagged_hit) lagged_hit = tf.where(lag_mask | tf.is_nan(lagged_hit), lag_zeros, lagged_hit) # Split for train and test x_hits, y_hits = tf.split(cropped_hit, [self.train_window, self.predict_window], axis=0) # Convert NaN to zero in for train data x_hits = tf.where(tf.is_nan(x_hits), tf.zeros_like(x_hits), x_hits) return x_hits, y_hits, cropped_dow, lagged_hit
def testUniformNans(self): a = 10.0 b = [11.0, 100.0] uniform = uniform_lib.Uniform(low=a, high=b) no_nans = tf.constant(1.0) nans = tf.constant(0.0) / tf.constant(0.0) self.assertTrue(self.evaluate(tf.is_nan(nans))) with_nans = tf.stack([no_nans, nans]) pdf = uniform.prob(with_nans) is_nan = self.evaluate(tf.is_nan(pdf)) self.assertFalse(is_nan[0]) self.assertTrue(is_nan[1])
def __call__(self, prediction_tensor, target_tensor, ignore_nan_targets=False, scope=None, **params): """Call the loss function. Args: prediction_tensor: an N-d tensor of shape [batch, anchors, ...] representing predicted quantities. target_tensor: an N-d tensor of shape [batch, anchors, ...] representing regression or classification targets. ignore_nan_targets: whether to ignore nan targets in the loss computation. E.g. can be used if the target tensor is missing groundtruth data that shouldn't be factored into the loss. scope: Op scope name. Defaults to 'Loss' if None. **params: Additional keyword arguments for specific implementations of the Loss. Returns: loss: a tensor representing the value of the loss function. """ with tf.name_scope(scope, 'Loss', [prediction_tensor, target_tensor, params]) as scope: if ignore_nan_targets: target_tensor = tf.where(tf.is_nan(target_tensor), prediction_tensor, target_tensor) return self._compute_loss(prediction_tensor, target_tensor, **params)
def testUniformNans(self): with self.test_session(): a = 10.0 b = [11.0, 100.0] uniform = tf.contrib.distributions.Uniform(a=a, b=b) no_nans = tf.constant(1.0) nans = tf.constant(0.0) / tf.constant(0.0) self.assertTrue(tf.is_nan(nans).eval()) with_nans = tf.pack([no_nans, nans]) pdf = uniform.pdf(with_nans) is_nan = tf.is_nan(pdf).eval() self.assertFalse(is_nan[0]) self.assertTrue(is_nan[1])
def filter_groundtruth_with_nan_box_coordinates(tensor_dict): """Filters out groundtruth with no bounding boxes. Args: tensor_dict: a dictionary of following groundtruth tensors - fields.InputDataFields.groundtruth_boxes fields.InputDataFields.groundtruth_classes fields.InputDataFields.groundtruth_confidences fields.InputDataFields.groundtruth_keypoints fields.InputDataFields.groundtruth_instance_masks fields.InputDataFields.groundtruth_is_crowd fields.InputDataFields.groundtruth_area fields.InputDataFields.groundtruth_label_types Returns: a dictionary of tensors containing only the groundtruth that have bounding boxes. """ groundtruth_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] nan_indicator_vector = tf.greater(tf.reduce_sum(tf.to_int32( tf.is_nan(groundtruth_boxes)), reduction_indices=[1]), 0) valid_indicator_vector = tf.logical_not(nan_indicator_vector) valid_indices = tf.where(valid_indicator_vector) return retain_groundtruth(tensor_dict, valid_indices)
def NLL(self, y, lengths, pis, mus, sigmas, rho, es, eps=1e-8): sigma_1, sigma_2 = tf.split(sigmas, 2, axis=2) y_1, y_2, y_3 = tf.split(y, 3, axis=2) mu_1, mu_2 = tf.split(mus, 2, axis=2) norm = 1.0 / (2*np.pi*sigma_1*sigma_2 * tf.sqrt(1 - tf.square(rho))) Z = tf.square((y_1 - mu_1) / (sigma_1)) + \ tf.square((y_2 - mu_2) / (sigma_2)) - \ 2*rho*(y_1 - mu_1)*(y_2 - mu_2) / (sigma_1*sigma_2) exp = -1.0*Z / (2*(1 - tf.square(rho))) gaussian_likelihoods = tf.exp(exp) * norm gmm_likelihood = tf.reduce_sum(pis * gaussian_likelihoods, 2) gmm_likelihood = tf.clip_by_value(gmm_likelihood, eps, np.inf) bernoulli_likelihood = tf.squeeze(tf.where(tf.equal(tf.ones_like(y_3), y_3), es, 1 - es)) nll = -(tf.log(gmm_likelihood) + tf.log(bernoulli_likelihood)) sequence_mask = tf.logical_and( tf.sequence_mask(lengths, maxlen=tf.shape(y)[1]), tf.logical_not(tf.is_nan(nll)), ) nll = tf.where(sequence_mask, nll, tf.zeros_like(nll)) num_valid = tf.reduce_sum(tf.cast(sequence_mask, tf.float32), axis=1) sequence_loss = tf.reduce_sum(nll, axis=1) / tf.maximum(num_valid, 1.0) element_loss = tf.reduce_sum(nll) / tf.maximum(tf.reduce_sum(num_valid), 1.0) return sequence_loss, element_loss
def kl_divergence(distribution_a, distribution_b, allow_nan_stats=True, name=None): """Get the KL-divergence KL(distribution_a || distribution_b). If there is no KL method registered specifically for `type(distribution_a)` and `type(distribution_b)`, then the class hierarchies of these types are searched. If one KL method is registered between any pairs of classes in these two parent hierarchies, it is used. If more than one such registered method exists, the method whose registered classes have the shortest sum MRO paths to the input types is used. If more than one such shortest path exists, the first method identified in the search is used (favoring a shorter MRO distance to `type(distribution_a)`). Args: distribution_a: The first distribution. distribution_b: The second distribution. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Returns: A Tensor with the batchwise KL-divergence between `distribution_a` and `distribution_b`. Raises: NotImplementedError: If no KL method is defined for distribution types of `distribution_a` and `distribution_b`. """ kl_fn = _registered_kl(type(distribution_a), type(distribution_b)) if kl_fn is None: # TODO(b/117098119): For backwards compatibility, we check TF's registry as # well. This typically happens when this function is called on a pair of # TF's distributions. with deprecation.silence(): return tf.distributions.kl_divergence(distribution_a, distribution_b) with tf.name_scope("KullbackLeibler"): kl_t = kl_fn(distribution_a, distribution_b, name=name) if allow_nan_stats: return kl_t # Check KL for NaNs kl_t = tf.identity(kl_t, name="kl") with tf.control_dependencies([ tf.Assert( tf.logical_not( tf.reduce_any(tf.is_nan(kl_t))), ["KL calculation between %s and %s returned NaN values " "(and was called with allow_nan_stats=False). Values:" % (distribution_a.name, distribution_b.name), kl_t])]): return tf.identity(kl_t, name="checked_kl")
def scale(self, x): """Scale x from -0.5 - 0.5 to 0 - 255.""" x = tf.where(tf.is_nan(x), tf.ones_like(x), x) x = tf.where(tf.is_inf(x), tf.ones_like(x), x) x = tf.clip_by_value(x, -0.5, 0.5) x += 0.5 x = x * 2**self.hparams.n_bits_x return tf.cast(tf.clip_by_value(x, 0, 255), dtype=tf.uint8)
def _build_mu_algorithm(self): """build dataflow graph for Multiplicative algorithm""" V, H, W = self.V, self.H, self.W rank = self.rank shape = V.get_shape() graph = tf.get_default_graph() #save W for calculating delta with the updated W W_old = tf.get_variable(name="W_old", shape=[shape[0], rank]) save_W = W_old.assign(W) #Multiplicative updates with graph.control_dependencies([save_W]): #update operation for H Wt = tf.transpose(W) WV = tf.matmul(Wt, V) WWH = tf.matmul(tf.matmul(Wt, W), H) WV_WWH = WV / WWH #select op should be executed in CPU not in GPU with tf.device('/cpu:0'): #convert nan to zero WV_WWH = tf.select(tf.is_nan(WV_WWH), tf.zeros_like(WV_WWH), WV_WWH) H_new = H * WV_WWH update_H = H.assign(H_new) with graph.control_dependencies([save_W, update_H]): #update operation for W (after updating H) Ht = tf.transpose(H) VH = tf.matmul(V, Ht) WHH = tf.matmul(W, tf.matmul(H, Ht)) VH_WHH = VH / WHH with tf.device('/cpu:0'): VH_WHH = tf.select(tf.is_nan(VH_WHH), tf.zeros_like(VH_WHH), VH_WHH) W_new = W * VH_WHH update_W = W.assign(W_new) self.delta = tf.reduce_sum(tf.abs(W_old - W)) self.step = tf.group(save_W, update_H, update_W)
def replace_nan_groundtruth_label_scores_with_ones(label_scores): """Replaces nan label scores with 1.0. Args: label_scores: a tensor containing object annoation label scores. Returns: a tensor where NaN label scores have been replaced by ones. """ return tf.where( tf.is_nan(label_scores), tf.ones(tf.shape(label_scores)), label_scores)
def set_zero_on_high_global_norm(self, grad, grad_norm_threshold, global_norm_tag=None): """ :param tf.Tensor grad: :param float grad_norm_threshold: :param str|None global_norm_tag: :rtype: tf.Tensor """ norm = self.get_global_grad_norm(tag=global_norm_tag) # Also check nan/inf. Treat them as if we would have been over grad_norm_threshold. zero_cond = tf.logical_or(tf.is_nan(norm), tf.is_inf(norm)) zero_cond = tf.logical_or(zero_cond, tf.greater(norm, grad_norm_threshold)) return tf.where(zero_cond, tf.zeros_like(grad), grad)
def _prob(self, x): broadcasted_x = x * tf.ones( self.batch_shape_tensor(), dtype=x.dtype) return tf.where( tf.is_nan(broadcasted_x), broadcasted_x, tf.where( tf.logical_or(broadcasted_x < self.low, # This > is only sound for continuous uniform broadcasted_x > self.high), tf.zeros_like(broadcasted_x), tf.ones_like(broadcasted_x) / self.range()))
def _compare(self, x, use_gpu): np_finite, np_inf, np_nan = np.isfinite(x), np.isinf(x), np.isnan(x) with self.test_session(use_gpu=use_gpu) as sess: inx = tf.convert_to_tensor(x) ofinite, oinf, onan = tf.is_finite(inx), tf.is_inf(inx), tf.is_nan(inx) tf_finite, tf_inf, tf_nan = sess.run([ofinite, oinf, onan]) self.assertAllEqual(np_inf, tf_inf) self.assertAllEqual(np_nan, tf_nan) self.assertAllEqual(np_finite, tf_finite) self.assertShapeEqual(np_inf, oinf) self.assertShapeEqual(np_nan, onan) self.assertShapeEqual(np_finite, ofinite)
def _get_cubic_root(self): """Get the cubic root.""" # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2 # where x = sqrt(mu). # We substitute x, which is sqrt(mu), with x = y + 1. # It gives y^3 + py = q # where p = (D^2 h_min^2)/(2*C) and q = -p. # We use the Vieta's substitution to compute the root. # There is only one real solution y (which is in [0, 1] ). # http://mathworld.wolfram.com/VietasSubstitution.html assert_array = [ tf.Assert( tf.logical_not(tf.is_nan(self._dist_to_opt_avg)), [self._dist_to_opt_avg,]), tf.Assert( tf.logical_not(tf.is_nan(self._h_min)), [self._h_min,]), tf.Assert( tf.logical_not(tf.is_nan(self._grad_var)), [self._grad_var,]), tf.Assert( tf.logical_not(tf.is_inf(self._dist_to_opt_avg)), [self._dist_to_opt_avg,]), tf.Assert( tf.logical_not(tf.is_inf(self._h_min)), [self._h_min,]), tf.Assert( tf.logical_not(tf.is_inf(self._grad_var)), [self._grad_var,]) ] with tf.control_dependencies(assert_array): p = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var w3 = (-tf.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0 w = tf.sign(w3) * tf.pow(tf.abs(w3), 1.0/3.0) y = w - p / 3.0 / w x = y + 1 return x
def __call__(self, prediction_tensor, target_tensor, ignore_nan_targets=False, losses_mask=None, scope=None, **params): """Call the loss function. Args: prediction_tensor: an N-d tensor of shape [batch, anchors, ...] representing predicted quantities. target_tensor: an N-d tensor of shape [batch, anchors, ...] representing regression or classification targets. ignore_nan_targets: whether to ignore nan targets in the loss computation. E.g. can be used if the target tensor is missing groundtruth data that shouldn't be factored into the loss. losses_mask: A [batch] boolean tensor that indicates whether losses should be applied to individual images in the batch. For elements that are True, corresponding prediction, target, and weight tensors will be removed prior to loss computation. If None, no filtering will take place prior to loss computation. scope: Op scope name. Defaults to 'Loss' if None. **params: Additional keyword arguments for specific implementations of the Loss. Returns: loss: a tensor representing the value of the loss function. """ with tf.name_scope(scope, 'Loss', [prediction_tensor, target_tensor, params]) as scope: if ignore_nan_targets: target_tensor = tf.where(tf.is_nan(target_tensor), prediction_tensor, target_tensor) if losses_mask is not None: tensor_multiplier = self._get_loss_multiplier_for_tensor( prediction_tensor, losses_mask) prediction_tensor *= tensor_multiplier target_tensor *= tensor_multiplier if 'weights' in params: params['weights'] = tf.convert_to_tensor(params['weights']) weights_multiplier = self._get_loss_multiplier_for_tensor( params['weights'], losses_mask) params['weights'] *= weights_multiplier return self._compute_loss(prediction_tensor, target_tensor, **params)
def __init__(self, batch_size, vocab_size, encoding_size, embedding_size, num_glimpses = 8, grad_norm_clip = 5., l2_reg_coef=1e-4, session=tf.Session(), name='AlternatingAttention'): """ Creates an iterative alternating attention network as described in https://arxiv.org/abs/1606.02245 """ self._batch_size = batch_size self._vocab_size = vocab_size self._encode_size = encoding_size self._infer_size = 4 * encoding_size self._embedding_size = embedding_size self._num_glimpses = num_glimpses self._sess = session self._name = name self._build_placeholders() self._build_variables() # Regularization tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(l2_reg_coef), [self._embeddings]) # Answer probability doc_attentions = self._inference(self._docs, self._queries) nans = tf.reduce_sum(tf.to_float(tf.is_nan(doc_attentions))) self._doc_attentions = doc_attentions ans_mask = tf.to_float(tf.equal(tf.expand_dims(self._answers, -1), self._docs)) P_a = tf.reduce_sum(ans_mask * doc_attentions, 1) loss_op = -tf.reduce_mean(tf.log(P_a + tf.constant(0.00001))) self._loss_op = loss_op # Optimizer and gradients with tf.name_scope("optimizer"): self._opt = tf.train.AdamOptimizer(learning_rate=self._learning_rate) grads_and_vars = self._opt.compute_gradients(loss_op) capped_grads_and_vars = [(tf.clip_by_norm(g, grad_norm_clip), v) for g,v in grads_and_vars] self._train_op = self._opt.apply_gradients(capped_grads_and_vars, global_step=self._global_step) tf.summary.scalar('loss', self._loss_op) tf.summary.scalar('learning_rate', self._learning_rate) tf.summary.histogram('answer_probability', P_a) self._summary_op = tf.summary.merge_all() self._sess.run(tf.global_variables_initializer())
def check_grads(grads_and_vars): has_nan_ops = [] amax_ops = [] for grad, _ in grads_and_vars: if grad is not None: if isinstance(grad, tf.IndexedSlices): x = grad.values else: x = grad has_nan_ops.append(tf.reduce_any(tf.is_nan(x))) amax_ops.append(tf.reduce_max(tf.abs(x))) has_nan = tf.reduce_any(has_nan_ops) amax = tf.reduce_max(amax_ops) return has_nan, amax
def def_preprocessing_fn(inputs): """tf.transform's callback function for preprocessing inputs. Args: inputs: map from feature keys to raw not-yet-transformed features. Returns: Map from string feature key to transformed feature operations. """ outputs = {} for key in taxi.DENSE_FLOAT_FEATURE_KEYS: # Preserve this feature as a dense float, setting nan's to the mean. outputs[taxi.transformed_name(key)] = transform.scale_to_z_score( _fill_in_missing(inputs[key])) for key in taxi.VOCAB_FEATURE_KEYS: # Build a vocabulary for this feature. outputs[ taxi.transformed_name(key)] = transform.compute_and_apply_vocabulary( _fill_in_missing(inputs[key]), top_k=taxi.VOCAB_SIZE, num_oov_buckets=taxi.OOV_SIZE) for key in taxi.BUCKET_FEATURE_KEYS: outputs[taxi.transformed_name(key)] = transform.bucketize( _fill_in_missing(inputs[key]), taxi.FEATURE_BUCKET_COUNT) for key in taxi.CATEGORICAL_FEATURE_KEYS: outputs[taxi.transformed_name(key)] = _fill_in_missing(inputs[key]) # Was this passenger a big tipper? taxi_fare = _fill_in_missing(inputs[taxi.FARE_KEY]) tips = _fill_in_missing(inputs[taxi.LABEL_KEY]) outputs[taxi.transformed_name(taxi.LABEL_KEY)] = tf.where( tf.is_nan(taxi_fare), tf.cast(tf.zeros_like(taxi_fare), tf.int64), # Test if the tip was > 20% of the fare. tf.cast( tf.greater(tips, tf.multiply(taxi_fare, tf.constant(0.2))), tf.int64)) return outputs
def _arccosine(self, slist1, slist2, tf_embs): """ Uses an arccosine kernel of degree 0 to calculate the similarity matrix between two vectors of embeddings. This is just cosine similarity projected into the [0,1] interval. """ dot = self._dot(slist1, slist2, tf_embs) # This calculation corresponds to an arc-cosine with # degree 0. It can be interpreted as cosine # similarity but projected into a [0,1] interval. # TODO: arc-cosine with degree 1. tf_pi = tf.constant(np.pi, dtype=tf.float64) tf_norms = tf.constant(self.norms, dtype=tf.float64, name='norms') normlist1 = tf.gather(tf_norms, slist1, name='normlist1') normlist2 = tf.matrix_transpose(tf.gather(tf_norms, slist2, name='normlist2')) norms = tf.batch_matmul(normlist1, normlist2) cosine = tf.clip_by_value(tf.truediv(dot, norms), -1, 1) angle = tf.acos(cosine) angle = tf.select(tf.is_nan(angle), tf.ones_like(angle) * tf_pi, angle) return 1 - (angle / tf_pi)
def mse(outputs, targets): """ Compute Mean Squared Error between given outputs and targets. If any values in ``targets`` are ``nan``, that will be treated as zero error for those elements. Parameters ---------- outputs : ``tf.Tensor`` Output values from a Probe in a network. targets : ``tf.Tensor`` Target values for a Probe in a network. Returns ------- mse : ``tf.Tensor`` Tensor representing the mean squared error. """ targets = tf.where(tf.is_nan(targets), outputs, targets) return tf.reduce_mean(tf.square(targets - outputs))
def save_model(sess, net, is_training, keep_prob): input_placeholder = tf.placeholder(tf.uint8, name='input_placeholder', shape=[None, SP1_BOX[0], SP1_BOX[1], SP1_BOX[2]]) input_32 = tf.cast(input_placeholder, tf.float32) mean, var = tf.nn.moments(input_32, [1], keep_dims=True) #single image normalization test_batch = tf.div(tf.subtract(input_32, mean), tf.sqrt(var)) test_batch = tf.where(tf.is_nan(test_batch), tf.zeros_like(test_batch), test_batch) test_batch = tf.nn.avg_pool(test_batch, ksize=[1, SP1_BOX[0]/SP2_BOX[0], SP1_BOX[1]/SP2_BOX[1], 1], strides=[1, SP1_BOX[0]/SP2_BOX[0], SP1_BOX[1]/SP2_BOX[1], 1], padding='SAME') if args.scheme == 'GBTC': test_batch = test_batch * 2 #trained on 4chan global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) if FLAGS.relu_input == 'relu': test_batch = tf.nn.relu(test_batch) elif FLAGS.relu_input == 'lrelu': test_batch = lrelu(test_batch, alpha=0.2) logits = net.inference(test_batch) predictions = tf.nn.softmax(logits, name='output') init = tf.global_variables_initializer() sess.run(init) #import IPython; IPython.embed() saver = tf.train.Saver(tf.global_variables()) latest = tf.train.latest_checkpoint(FLAGS.train_dir) if not latest: print("No checkpoint to continue from in", FLAGS.train_dir) sys.exit(1) print("resume", latest) saver.restore(sess, latest) checkpoint_path = os.path.join(FLAGS.train_dir, 'model_with_preprocessing.ckpt') saver.save(sess, checkpoint_path, global_step=global_step) return
def _arccosine(self, s1, s2, tf_embs): """ Uses an arccosine kernel of degree 0 to calculate the similarity matrix between two vectors of embeddings. This is just cosine similarity projected into the [0,1] interval. """ tf_pi = tf.constant(np.pi, dtype=tf.float64) mat1 = tf.gather(tf_embs, s1) mat2 = tf.gather(tf_embs, s2) tf_norms = tf.constant(self.norms, dtype=tf.float64, name='norms') norms1 = tf.gather(tf_norms, s1) norms2 = tf.gather(tf_norms, s2) dot = tf.matmul(mat1, tf.transpose(mat2)) norms = tf.matmul(norms1, tf.transpose(norms2)) # We clip values due to numerical errors # which put some values outside the arccosine range. cosine = tf.clip_by_value(dot / norms, -1, 1) angle = tf.acos(cosine) # The 0 vector has norm 0, which generates a NaN. # We catch these NaNs and replace them with pi, # which ends up returning 0 similarity. angle = tf.select(tf.is_nan(angle), tf.ones_like(angle) * tf_pi, angle) return 1 - (angle / tf_pi)
def create_model(inputs, targets, target_masks, iter_num): """Create the full model for training/testing """ out_channels = int(targets.get_shape()[-1]) assert out_channels == int(inputs.get_shape()[-1]) outputs, iters, img_shape, max_iter = IterGAN(inputs, out_channels, iter_num, name='') rest = {} if a.sample_lambda > 0.0: with tf.name_scope('sample_steps'): i = tf.random_uniform((), maxval=max_iter, dtype=tf.int32) j = tf.random_uniform((), maxval=2, dtype=tf.int32) real_imgs = tf.stack([inputs, targets], name='real_imgs') d = IMG_SHAPE[1] with tf.name_scope('sample_fake'): sample_fake = iters[:, :, (i + 1) * d:(i + 2) * d, :] sample_fake.set_shape(img_shape) sample_real = real_imgs[j] with tf.variable_scope('disciminator_sample'): predict_sample_real = create_discriminator(sample_real) with tf.variable_scope('disciminator_sample', reuse=True): predict_sample_fake = create_discriminator(sample_fake) rest['sample'] = { 'i': i, 'j': j, 'predict_real': predict_sample_real, 'predict_fake': predict_sample_fake, 'real_inp': sample_real, 'fake_inp': sample_fake } # create two copies of discriminator, one for real and one for fake pairs # they share the same underlying variables with tf.name_scope('real_discriminator'): with tf.variable_scope('discriminator'): # 2x [batch, height, width, channels] => [batch, 30, 30, 1] predict_real = create_discriminator(inputs, targets) with tf.name_scope('fake_discriminator'): with tf.variable_scope('discriminator', reuse=True): # 2x [batch, height, width, channels] => [batch, 30, 30, 1] predict_fake = create_discriminator(inputs, outputs) with tf.name_scope('discriminator_loss'): # minimizing -tf.log will try to get inputs to 1 # predict_real => 1 # predict_fake => 0 discrim_loss = tf.reduce_mean(-(tf.log(predict_real + EPS) + tf.log(1 - predict_fake + EPS))) if a.sample_lambda > 0.0: discim_loss = discrim_loss + a.sample_lambda * \ tf.reduce_mean(-(tf.log(predict_sample_real + EPS) + tf.log(1 - predict_sample_fake + EPS))) with tf.name_scope('generator_loss'): # predict_fake => 1 # abs(targets - outputs) => 0 gen_loss_GAN = tf.reduce_mean(-tf.log(predict_fake + EPS)) if a.mmad_loss: with tf.name_scope('MMAD'): dif = tf.abs(targets - outputs, name='absdist') temp = tf.reduce_mean(dif) foreground_L1 = tf.reduce_mean(tf.boolean_mask( dif, target_masks), name='foreground') neg_target_masks = tf.logical_not(target_masks, name='neg') background_L1 = tf.reduce_mean(tf.boolean_mask( dif, neg_target_masks), name='background') gen_loss_L1 = 2 * foreground_L1 / 3 + background_L1 / 3 gen_loss_L1 = tf.where(tf.is_nan(gen_loss_L1), temp, gen_loss_L1) else: gen_loss_L1 = tf.reduce_mean(tf.abs(targets - outputs)) gen_loss = gen_loss_GAN * a.gan_weight + gen_loss_L1 * a.l1_weight if a.sample_lambda > 0.0: gen_loss = gen_loss + a.sample_lambda * \ tf.reduce_mean(-tf.log(predict_sample_fake + EPS)) global_step = tf.contrib.framework.get_or_create_global_step() incr_global_step = tf.assign(global_step, global_step + 1) if a.mode in {'train'}: with tf.name_scope('discriminator_train'): discrim_tvars = [ var for var in tf.trainable_variables() if var.name.startswith('discriminator') ] discrim_optim = tf.train.AdamOptimizer(a.lr, a.beta1) discrim_grads_and_vars = discrim_optim.compute_gradients( discrim_loss, var_list=discrim_tvars) discrim_train = discrim_optim.apply_gradients( discrim_grads_and_vars) with tf.name_scope('generator_train'): with tf.control_dependencies([discrim_train]): gen_tvars = [ var for var in tf.trainable_variables() if var.name.startswith('generator') ] gen_optim = tf.train.AdamOptimizer(a.lr, a.beta1) gen_grads_and_vars = gen_optim.compute_gradients( gen_loss, var_list=gen_tvars) gen_train = gen_optim.apply_gradients(gen_grads_and_vars) ema = tf.train.ExponentialMovingAverage(decay=0.99) update_losses = ema.apply([discrim_loss, gen_loss_GAN, gen_loss_L1]) return Model(predict_real=predict_real, predict_fake=predict_fake, discrim_loss=ema.average(discrim_loss), discrim_grads_and_vars=discrim_grads_and_vars, gen_loss_GAN=ema.average(gen_loss_GAN), gen_loss_L1=ema.average(gen_loss_L1), gen_grads_and_vars=gen_grads_and_vars, outputs=outputs, iters=tf.concat(iters, axis=2, name='between_steps'), train=tf.group(update_losses, incr_global_step, gen_train), rest=rest) else: return Model(predict_real=predict_real, predict_fake=predict_fake, discrim_loss=discrim_loss, discrim_grads_and_vars=tf.constant(0), gen_loss_GAN=gen_loss_GAN, gen_loss_L1=gen_loss_L1, gen_grads_and_vars=tf.constant(0), outputs=outputs, iters=tf.concat(iters, axis=2, name='between_steps'), train=tf.constant(0), rest=rest)
def main(): args = get_arguments() if args.dataset == 'ade20k': param = ADE20k_param elif args.dataset == 'cityscapes': param = cityscapes_param else: param = surreal_param # Set placeholder image_filename = tf.placeholder(dtype=tf.string) anno_filename = tf.placeholder(dtype=tf.string) # Read & Decode image img = tf.image.decode_image(tf.read_file(image_filename), channels=3) anno = tf.image.decode_image(tf.read_file(anno_filename), channels=1) img.set_shape([None, None, 3]) anno.set_shape([None, None, 1]) ori_shape = tf.shape(img) img = preprocess(img, param) model = model_config[args.model] net = model({'data': img}, num_classes=param['num_classes'], filter_scale=args.filter_scale, evaluation=True) # Predictions. raw_output = net.layers['conv6_cls'] raw_output_up = tf.image.resize_bilinear(raw_output, size=ori_shape[:2], align_corners=True) raw_output_up = tf.argmax(raw_output_up, axis=3) raw_pred = tf.expand_dims(raw_output_up, dim=3) # mIoU pred_flatten = tf.reshape(raw_pred, [-1,]) raw_gt = tf.reshape(anno, [-1,]) mask = tf.not_equal(raw_gt, param['ignore_label']) indices = tf.squeeze(tf.where(mask), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) pred = tf.gather(pred_flatten, indices) #I do not know which one I have to choose if args.dataset == 'ade20k': pred = tf.add(pred, tf.constant(1, dtype=tf.int64)) mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=param['num_classes']+1) elif args.dataset == 'cityscapes': mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=param['num_classes']) elif args.dataset == 'surreal': less_equal_class = tf.less_equal(raw_gt, param['num_classes']-1) not_equal_ignore = tf.not_equal(raw_gt, param['ignore_label']) mask = tf.logical_and(less_equal_class, not_equal_ignore) indices = tf.squeeze(tf.where(mask), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int64) pred = tf.cast(tf.gather(pred_flatten, indices),tf.int64) gt_n = tf.reshape(gt, [-1]) pred_n = tf.reshape(pred, [-1]) with tf.name_scope('metrics'): mIoU, update_op = tf.metrics.mean_iou(gt_n,pred_n, num_classes=param['num_classes']) accu, update_acc = tf.metrics.accuracy(gt_n,pred_n) reca, update_rec = tf.metrics.recall(gt_n,pred_n) prec, update_pre = tf.metrics.precision(gt_n,pred_n) mean, update_mean = tf.metrics.mean_per_class_accuracy(gt_n,pred_n, num_classes =param['num_classes']) conf_matrix = tf.confusion_matrix(gt_n,pred_n, num_classes=param['num_classes']) acc_per_class = tf.diag_part(conf_matrix)/tf.reduce_sum(conf_matrix,1) acc_per_class_good = tf.where(tf.is_nan(acc_per_class), tf.zeros_like(acc_per_class), acc_per_class) running_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="metrics") # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() running_vars_initializer = tf.variables_initializer(var_list=running_vars) sess.run(init) sess.run(running_vars_initializer) listy = [] #tiene pinta que lo que tenia que estar en snapshots en train ahora va a model model_path = model_paths[args.model] if args.model == 'others': ckpt = tf.train.get_checkpoint_state(model_path) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=tf.global_variables()) load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found.') else: #net.load(model_path, sess) print('Restore from {}'.format(model_path)) img_files, anno_files = read_labeled_image_list(param['data_dir'], param['data_list']) for i in trange(param['num_steps'], desc='evaluation', leave=True): feed_dict = {image_filename: img_files[i], anno_filename: anno_files[i]} _ = sess.run([update_op,update_acc,update_rec,update_pre, update_mean], feed_dict=feed_dict) m,a,r,p,ma,apc = sess.run([mIoU,accu,reca,prec, mean,acc_per_class_good], feed_dict=feed_dict) f = 2*p*r/(p+r) metris = np.array([m,a,f,r,p]) metris = np.append(metris, ma) metris = np.append(metris, apc) listy.append(metris) if i > 0 and args.measure_time: calculate_time(sess, net, raw_pred, feed_dict) ll = np.mean(np.array(listy), axis = 0) np.save("./loss_data/loss_metrics.npy",ll) print('MIOU: {}'.format(m))
def lrp_one_timestep(self, r_incoming, t): """lrp applied to TeLL LSTMLayer for 1 timestep Parameters ------- r_incoming : tensor (batchsize, 1, units) relevance coming in (flowing in from upper layer/future timestep) t : tensor int tensor with current timestep (as to be used to index o, c, i, z) """ zero = self.__zero__ zero_init = self.__zero_init__ alpha, beta = self._alpha_, self._beta_ mul_rule = self._mul_rule_ act_h = self._act_h_ w_o, w_i = self._w_o_, self._w_i_ o_min, i_min, c_min, z_min = self._o_min_, self._i_min_, self._c_min_, self._z_min_ o_max, i_max, c_max, z_max = self._o_max_, self._i_max_, self._c_max_, self._z_max_ o, c, i, z = self._o_, self._c_, self._i_, self._z_ lrp_keys = self._lrp_keys_ lrp_dict = self._lrp_dict_ r_z, r_from_o, r_from_i, r_cc, r_y, r_cy, r_o, r_c, r_zi, r_i = [lrp_dict[k] for k in lrp_keys] # # for time t # if mul_rule is None: r_y = tf.concat([r_y, tf.expand_dims(r_incoming[:, -1, :], axis=1)], axis=1) r_cy = tf.concat([r_cy, tf.expand_dims(r_y[:, -1, :], axis=1)], axis=1) r_o = tf.concat([r_o, zero_init], axis=1) r_c = tf.concat([r_c, tf.expand_dims(r_cy[:, -1, :] + r_cc[:, -1, :], axis=1)], axis=1) r_zi_new = tf.expand_dims(r_c[:, -1, :] * (i[:, t, :] * z[:, t, :] / c[:, t, :]), axis=1) r_zi = tf.concat([r_zi, tf.where(tf.is_nan(r_zi_new), zero_init, r_zi_new)], axis=1) r_z = tf.concat([r_z, tf.expand_dims(r_zi[:, -1, :], axis=1)], axis=1) r_i = tf.concat([r_i, zero_init], axis=1) else: r_y = tf.concat([r_y, tf.expand_dims(r_incoming[:, -1, :] + r_from_o[:, -1, :] + r_from_i[:, -1, :], axis=1)], axis=1) r_cy = tf.concat([r_cy, tf.expand_dims(mul_rule(act_h(c[:, t, :]), o[:, t, :], r_y[:, -1, :], c_min, c_max, o_min, o_max), axis=1)], axis=1) r_o = tf.concat([r_o, tf.expand_dims(mul_rule(o[:, t, :], act_h(c[:, t, :]), r_y[:, -1, :], o_min, o_max, c_min, c_max), axis=1)], axis=1) r_c = tf.concat([r_c, tf.expand_dims(r_cy[:, -1, :] + r_cc[:, -1, :], axis=1)], axis=1) r_zi_new = tf.expand_dims(r_c[:, -1, :] * (i[:, t, :] * z[:, t, :] / c[:, t, :]), axis=1) r_zi = tf.concat([r_zi, tf.where(tf.is_nan(r_zi_new), zero_init, r_zi_new)], axis=1) r_z = tf.concat([r_z, tf.expand_dims(mul_rule(z[:, t, :], i[:, t, :], r_zi[:, -1, :], z_min, z_max, i_min, i_max), axis=1)], axis=1) r_i = tf.concat([r_i, tf.expand_dims(mul_rule(i[:, t, :], z[:, t, :], r_zi[:, -1, :], i_min, i_max, z_min, z_max), axis=1)], axis=1) # # distribute R to units through recurrent connections # t_greater_0 = tf.greater(t, zero) r_from_o_t = lrp(r=r_o[:, -1, :], w=w_o, x=o[:, t - 1, :], x_min=o_min, alpha=alpha, beta=beta) r_from_o = tf.cond(t_greater_0, lambda: tf.concat([r_from_o, tf.expand_dims(r_from_o_t, axis=1)], axis=1), lambda: r_from_o) r_from_i_t = lrp(r=r_i[:, -1, :], w=w_i, x=i[:, t - 1, :], x_min=i_min, alpha=alpha, beta=beta) r_from_i = tf.cond(t_greater_0, lambda: tf.concat([r_from_i, tf.expand_dims(r_from_i_t, axis=1)], axis=1), lambda: r_from_i) # # for time t-1 # r_cc_new = tf.expand_dims(c[:, t - 1, :] / c[:, t, :] * r_c[:, -1, :], axis=1) r_cc = tf.cond(t_greater_0, lambda: tf.concat([r_cc, tf.where(tf.is_nan(r_cc_new), zero_init, r_cc_new)], axis=1), lambda: r_cc) self._lrp_dict_ = OrderedDict(((k, v) for k, v in zip(lrp_keys, [r_z, r_from_o, r_from_i, r_cc, r_y, r_cy, r_o, r_c, r_zi, r_i])))
def proportional_multiplication_rule(x, y, z, x_min, x_max, y_min, y_max): r = z * ((x-x_min) / (x_max - x_min)) / (((x-x_min) / (x_max - x_min)) + ((y-y_min) / (y_max - y_min))) return tf.where(tf.is_nan(r), tf.zeros_like(r), r)
def mse_ignore_nans(preds, targets, **kwargs): #Computes mse, ignores targets which are NANs # replace nans in the target with corresponding preds, so that there is no gradient for those targets_nonan = tf.where(tf.is_nan(targets), preds, targets) return tf.reduce_mean(tf.square(targets_nonan - preds), **kwargs)
def __init__( self, features, params={ 'prior_mean_hidden_layer': -1e-5, 'prior_stddev_hidden_layer': 1e-6, 'prior_stddev_outer_layer': 1e-8 }): self.features = features # Inputs to the tensorflow graph. X will be our phi(S, A), Y will be our reward self.X = tf.placeholder(tf.float32, [None, features]) self.Y = tf.placeholder(tf.float32, [None, 1]) self.hidden_layer_mean = params['prior_mean_hidden_layer'] self.hidden_layer_stddev = params['prior_stddev_hidden_layer'] self.outer_layer_sttdev = params['prior_stddev_outer_layer'] # Should be expandable to a deep network by adding more layers # Can add dense flipout layers for fully bayesian or could add simple dense or convolutional layers # to project into a smaller feature space before doing full distributions (would be more computationally efficient) self.layers = tf.keras.Sequential([ tfp.layers.DenseFlipout( # one output for estimating the reward 1, # the _prior_ distribution over our weights (even though it says posterior, it is the prior in the bayes rule sense) # this creates a vector of learnable independent normal distributions kernel_posterior_fn=tfp_layers_util. default_mean_field_normal_fn( # initialize the mean of the normal distributions randomly so that the means are slightly negative (pessimistic init) loc_initializer=tf.random_normal_initializer( mean=self.hidden_layer_mean, stddev=self.hidden_layer_stddev ) # prior mean and stddev of nodes in hidden layer ), # regularize our weights by pulling them towards a N(0, 1e-8) distribution # cannot have a N(0, 0) distribution, so pull them towards something with no variance kernel_prior_fn=KernelPrior(self.outer_layer_sttdev). output, # prior stddev over y's (outputs, in our case th rewards) # Don't use a bias weight here bias_posterior_fn= None, # set to None to keep everything local (local variance over all features) ) ]) # make predictions by sampling weights from the posterior and multiplying phi(S, A) self.predictions = self.layers(self.X) # model the variance of the noise on Y with a learnable normal distribution std = VariationalParameter('noise_std', [1]) # build the distribution over Y ~ N(W*phi(S, A), std) pred_dist = tfd.Normal(loc=self.predictions, scale=std.sample()) # Build the loss function # get the log probability of observing this value of Y given our parameters: P(Y | theta) log_prob = pred_dist.log_prob(self.Y) # make sure this log probability isn't nan (bug in tensorflow when variance approaches 0. if it is nan, just set it to zero) non_nan = tf.where(tf.is_nan(log_prob), tf.zeros_like(log_prob), log_prob) # get the mean over the outputs (only 1 output for now so this isn't really necessary, but it is good to be generic) neg_log_prob = -tf.reduce_mean(non_nan) # The KL-divergence is what trains the variance over the weights, the neg_log_prob is the loss over the mean # The KL-divergence is added as a "regularizer" to the layers as a hack to make this work with the tensorflow infrastructure (that's how tfp works) kl_div = sum(self.layers.losses) # the ELBO loss is just the sum of the loss over the variance (kl-div) and the loss over the mean (neg_log_prob) elbo_loss = neg_log_prob + kl_div # minimize the loss using some optimizer (adam with small learning rate seems to work well) optimizer = tf.train.AdamOptimizer(0.01) self.train = optimizer.minimize(elbo_loss) # initialize the tensorflow graph and get initial values of the weights init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) init_op.run()
def model_fn(self, features, labels, mode): """Auto-Scaling 3D CNN model. For more information on how to write a model function, see: https://www.tensorflow.org/guide/custom_estimators#write_a_model_function """ input_layer = features # Replace missing values by 0 hidden_layer = tf.where(tf.is_nan(input_layer), tf.zeros_like(input_layer), input_layer) # Repeatedly apply 3D CNN, followed by 3D max pooling # until the hidden layer has reasonable number of entries REASONABLE_NUM_ENTRIES = 1000 num_filters = 16 # The number of filters is fixed while True: shape = hidden_layer.shape kernel_size = [ min(3, shape[1]), min(3, shape[2]), min(3, shape[3]) ] hidden_layer = tf.layers.conv3d(inputs=hidden_layer, filters=num_filters, kernel_size=kernel_size) pool_size = [min(2, shape[1]), min(2, shape[2]), min(2, shape[3])] hidden_layer = tf.layers.max_pooling3d(inputs=hidden_layer, pool_size=pool_size, strides=pool_size, padding='valid', data_format='channels_last') if get_num_entries(hidden_layer) < REASONABLE_NUM_ENTRIES: break hidden_layer = tf.layers.flatten(hidden_layer) hidden_layer = tf.layers.dense(inputs=hidden_layer, units=64, activation=tf.nn.relu) hidden_layer = tf.layers.dropout( inputs=hidden_layer, rate=0.15, training=mode == tf.estimator.ModeKeys.TRAIN) logits = tf.layers.dense(inputs=hidden_layer, units=self.output_dim) sigmoid_tensor = tf.nn.sigmoid(logits, name="sigmoid_tensor") predictions = { # Generate predictions (for PREDICT and EVAL mode) "classes": tf.argmax(input=logits, axis=1), # "classes": binary_predictions, # Add `sigmoid_tensor` to the graph. It is used for PREDICT and by the # `logging_hook`. "probabilities": sigmoid_tensor } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Calculate Loss (for both TRAIN and EVAL modes) # For multi-label classification, a correct loss is sigmoid cross entropy loss = sigmoid_cross_entropy_with_logits(labels=labels, logits=logits) # Configure the Training Op (for TRAIN mode) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.AdamOptimizer() train_op = optimizer.minimize( loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) # Add evaluation metrics (for EVAL mode) assert mode == tf.estimator.ModeKeys.EVAL eval_metric_ops = { "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"]) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def __init__(self,is_training,params): self.batch_size = batch_size = params["batch_size"] self.num_steps = num_steps = params["seq_length"] self._Y_vals=[] size = params['n_hidden'] input_size = params['input_size'] keep_prob=params['keep_prob'] max_grad_norm=params['max_grad_norm'] self._input_data = tf.placeholder(tf.float32, [batch_size, num_steps,input_size]) self._targets = tf.placeholder(tf.float32, [batch_size*num_steps,params["n_output"]]) self._zeros=tf.zeros([batch_size*num_steps,params["n_output"]],tf.float32) lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=size,input_size=input_size) if is_training and keep_prob < 1: lstm_cell = tf.nn.rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=keep_prob) lstm_cell2 = tf.nn.rnn_cell.LSTMCell(num_units=size,input_size=size) if is_training and keep_prob < 1: lstm_cell2 = tf.nn.rnn_cell.DropoutWrapper( lstm_cell2, output_keep_prob=keep_prob) lstm_cell3 = tf.nn.rnn_cell.LSTMCell(num_units=size,input_size=size) if is_training and keep_prob < 1: lstm_cell3 = tf.nn.rnn_cell.DropoutWrapper( lstm_cell3, output_keep_prob=keep_prob) cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell,lstm_cell2,lstm_cell3]) self._initial_state = cell.zero_state(batch_size, tf.float32) if is_training and keep_prob < 1: self._input_data = tf.nn.dropout(self._input_data, keep_prob) outputs = [] state = self._initial_state with tf.variable_scope("RNN"): for time_step in range(num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(self._input_data[:,time_step,:], state) outputs.append(cell_output) output = tf.reshape(tf.concat(1, outputs), [-1, size]) softmax_w = tf.get_variable("softmax_w", [size, params["n_output"]]) softmax_b = tf.get_variable("softmax_b", [params["n_output"]]) self._Y_vals = tf.tanh(tf.matmul(output, softmax_w) + softmax_b) tmp = self._Y_vals - self._targets tmpt=tf.select(tf.is_nan(tmp),self._zeros,tmp) loss= tf.nn.l2_loss(tmpt) self._cost = cost = tf.reduce_mean(loss) self._final_state = state self._tvars = tf.trainable_variables() if not is_training: return self._lr = tf.Variable(0.0, trainable=False) grads, _ = tf.clip_by_global_norm(tf.gradients(cost, self._tvars),max_grad_norm) optimizer = tf.train.AdamOptimizer(self.lr) self._train_op = optimizer.apply_gradients(zip(grads, self._tvars))
def SanitizedAutoCorrelation(x, axis, *args, **kwargs): res = tfd.auto_correlation(x, axis, *args, **kwargs) res = tf.where(tf.is_nan(res), tf.ones_like(res), res) res = tf.where(tf.is_inf(res), tf.ones_like(res), res) return res
def gradient_descent(self, sess, model): def compare(x, y): if self.TARGETED: return x == y else: return x != y shape = (BATCH_SIZE, model.image_size, model.image_size, model.num_channels) # the variable to optimize over modifier = tf.Variable(np.zeros(shape, dtype=np.float32)) canchange = tf.Variable(np.zeros(shape), dtype=np.float32) # the variables we're going to hold, use for efficiency simg = tf.Variable(np.zeros(shape, dtype=np.float32)) original = tf.Variable(np.zeros(shape, dtype=np.float32)) timg = tf.Variable(np.zeros(shape, dtype=np.float32)) tlab = tf.Variable( np.zeros((BATCH_SIZE, model.num_labels), dtype=np.float32)) # and the assignment to set the variables assign_simg = tf.placeholder(np.float32, shape) assign_original = tf.placeholder(np.float32, shape) assign_timg = tf.placeholder(np.float32, shape) assign_tlab = tf.placeholder(np.float32, (BATCH_SIZE, self.model.num_labels)) # these are the variables to initialize when we run setup = [] setup.append(tf.assign(timg, assign_timg)) setup.append(tf.assign(original, assign_original)) setup.append(tf.assign(simg, assign_simg)) setup.append(tf.assign(tlab, assign_tlab)) newimg = (tf.tanh(modifier + simg) / 2) * self.norm_to_01( canchange) + (1 - self.norm_to_01(canchange)) * original Initnewimg = newimg Initnewimg = tf.clip_by_value((Initnewimg + 0.5) * 255., 0., 255.) Initnewimg = Initnewimg / 255. - 0.5 Initoutput = model.predict(Initnewimg) Initreal = tf.reduce_sum((tlab) * Initoutput, 1) Initother = tf.reduce_max((1 - tlab) * Initoutput - (tlab * 10000), 1) if self.TARGETED: Initloss1 = tf.maximum(0.0, Initother - Initreal + .01) else: Initloss1 = tf.maximum(0.0, Initreal - Initother + .01) # sum up the losses Initloss_sbin = self.L_0loss(self.norm_to_01(canchange), 10.) Initloss_midbin = tf.where(tf.is_nan(Initloss_sbin), tf.zeros_like(Initloss_sbin), Initloss_sbin) Initloss_sbin = tf.where( tf.is_nan(Initloss_sbin), tf.zeros_like(Initloss_sbin) + tf.reduce_mean(Initloss_midbin), Initloss_sbin) Initloss_bin = Initloss_sbin # tf.reduce_mean(Initloss_sbin)# Initloss_smod = self.L_0loss( (tf.tanh(modifier + simg) / 2 - tf.tanh(timg) / 2), 10.) Initloss_midbin = tf.where(tf.is_nan(Initloss_smod), tf.zeros_like(Initloss_smod), Initloss_smod) Initloss_smod = tf.where( tf.is_nan(Initloss_smod), tf.zeros_like(Initloss_smod) + tf.reduce_mean(Initloss_midbin), Initloss_smod) Initloss_mod = Initloss_smod Initloss = 10. * Initloss1 + 0.5 * Initloss_bin + 0.5 * Initloss_mod # 0.2 for mnist, 0.5 for cifar10 # setup the adam optimizer and keep track of variables we're creating start_vars = set(x.name for x in tf.global_variables()) optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE) Inittrain = optimizer.minimize(Initloss, var_list=[modifier, canchange]) end_vars = tf.global_variables() new_vars = [x for x in end_vars if x.name not in start_vars] init = tf.variables_initializer( var_list=[modifier, canchange, simg, original, timg, tlab] + new_vars) def doit(oimgs, labs, starts): # convert to tanh-space imgs = np.arctanh(np.array(oimgs) * 1.999999) starts = np.arctanh(np.array(starts) * 1.999999) # initialize the variables sess.run(init) sess.run( setup, { assign_timg: imgs, assign_tlab: labs, assign_simg: starts, assign_original: oimgs }) old_nimg = None old_Equal_count = old_Initloss_b = 1000. for step in range(self.MAX_ITERATIONS): # remember the old value _, works, ploss, qloss, Initloss_b, Initloss_m = sess.run([ Inittrain, Initloss1, Initoutput, tlab, Initloss_sbin, Initloss_mod ]) #print("works",works) #print("Initloss_b",Initloss_b) #print("Initloss_m",Initloss_m) if self.TARGETED: Flag = np.argmax(ploss, 1) == np.argmax(np.squeeze(qloss)) if np.sum(Flag) >= 1: op_index = np.argmin(Initloss_b * Flag) nimg = sess.run((Initnewimg)) if Initloss_b[op_index] < old_Initloss_b: old_nimg = nimg[op_index] old_Initloss_b = Initloss_b[op_index] else: if np.argmax(ploss) != np.argmax(np.squeeze(qloss)): nimg = sess.run((Initnewimg)) cal_img = np.around( np.clip((np.array(oimgs) + 0.5) * 255., 0., 255.)) cal_nimg = np.around( np.clip((np.array(nimg) + 0.5) * 255., 0., 255.)) Equal_count = np.sum( np.all(np.abs(cal_img - cal_nimg) > 1, axis=3), (1, 2)) if Equal_count < old_Equal_count: old_Equal_count = Equal_count old_nimg = nimg if old_nimg is not None: init_input = np.expand_dims(oimgs[0], 0) cal_img = np.around( np.clip((init_input + 0.5) * 255., 0., 255.)) cal_nimg = np.around( np.clip((np.array(old_nimg) + 0.5) * 255., 0., 255.)) Equal_count = np.sum(np.abs(cal_img - cal_nimg) > 1.) #print("Equal count:", np.sum(np.all(np.sum(np.abs(cal_img - cal_nimg), 0) > 1, axis=2))) return Equal_count, old_nimg else: return None, None return doit
def checkForNan(tensor): return tf.reduce_sum(tf.add(tf.to_float(tf.is_nan(tensor)), tf.to_float(tf.is_inf(tensor))))
def project_dual(self): """Function that projects the input dual variables onto the feasible set. Returns: projected_dual: Feasible dual solution corresponding to current dual projected_certificate: Objective value of feasible dual """ # TODO: consider whether we can use shallow copy of the lists without # using tf.identity projected_lambda_pos = [tf.identity(x) for x in self.lambda_pos] projected_lambda_neg = [tf.identity(x) for x in self.lambda_neg] projected_lambda_quad = [tf.identity(x) for x in self.lambda_quad] projected_lambda_lu = [tf.identity(x) for x in self.lambda_lu] projected_nu = tf.identity(self.nu) # TODO: get rid of the special case for one hidden layer # Different projection for 1 hidden layer if self.nn_params.num_hidden_layers == 1: # Creating equivalent PSD matrix for H by Schur complements diag_entries = 0.5 * tf.divide( tf.square(self.lambda_quad[self.nn_params.num_hidden_layers]), (self.lambda_quad[self.nn_params.num_hidden_layers] + self.lambda_lu[self.nn_params.num_hidden_layers])) # If lambda_quad[i], lambda_lu[i] are 0, entry is NaN currently, # but we want to set that to 0 diag_entries = tf.where(tf.is_nan(diag_entries), tf.zeros_like(diag_entries), diag_entries) matrix = ( tf.matmul(tf.matmul(tf.transpose( self.nn_params.weights[self.nn_params.num_hidden_layers - 1]), utils.diag(diag_entries)), self.nn_params.weights[self.nn_params.num_hidden_layers - 1])) new_matrix = utils.diag( 2 * self.lambda_lu[self.nn_params.num_hidden_layers - 1]) - matrix # Making symmetric new_matrix = 0.5 * (new_matrix + tf.transpose(new_matrix)) eig_vals = tf.self_adjoint_eigvals(new_matrix) min_eig = tf.reduce_min(eig_vals) # If min_eig is positive, already feasible, so don't add # Otherwise add to make PSD [1E-6 is for ensuring strictly PSD (useful # while inverting) projected_lambda_lu[0] = (projected_lambda_lu[0] + 0.5 * tf.maximum(-min_eig, 0) + 1E-6) else: # Minimum eigen value of H # TODO: Write this in terms of matrix multiply # matrix H is a submatrix of M, thus we just need to extend existing code # for computing matrix-vector product (see get_psd_product function). # Then use the same trick to compute smallest eigenvalue. eig_vals = tf.self_adjoint_eigvals(self.matrix_h) min_eig = tf.reduce_min(eig_vals) for i in range(self.nn_params.num_hidden_layers + 1): # Since lambda_lu appears only in diagonal terms, can subtract to # make PSD and feasible projected_lambda_lu[i] = (projected_lambda_lu[i] + 0.5 * tf.maximum(-min_eig, 0) + 1E-6) # Adjusting lambda_neg wherever possible so that lambda_neg + lambda_lu # remains close to unchanged # projected_lambda_neg[i] = tf.maximum(0.0, projected_lambda_neg[i] + # (0.5*min_eig - 1E-6)* # (self.lower[i] + self.upper[i])) projected_dual_var = {'lambda_pos': projected_lambda_pos, 'lambda_neg': projected_lambda_neg, 'lambda_lu': projected_lambda_lu, 'lambda_quad': projected_lambda_quad, 'nu': projected_nu} projected_dual_object = DualFormulation(projected_dual_var, self.nn_params, self.test_input, self.true_class, self.adv_class, self.input_minval, self.input_maxval, self.epsilon) projected_certificate = projected_dual_object.compute_certificate() return projected_certificate
def build_loss(self, objective): """ Adds elements into the graph to compute the given objective. Parameters ---------- objective : dict of {:class:`~nengo:nengo.Probe`: ``"mse"`` or \ callable or ``None``} The objective used to compute loss for each probe. Passing ``"mse"`` will use mean squared error. A custom function ``f(output, target) -> loss`` can be passed that consumes the actual output and target output for a probe in ``targets`` and returns a ``tf.Tensor`` representing the scalar loss value for that Probe (loss will be summed across Probes). Returns ------- ``tf.Tensor`` Tensor representing the sum of the given objectives applied to target probes """ key = frozenset(objective.items()) try: # return the cached loss tensor if it exists return self.losses[key] except KeyError: pass loss = [] for p, obj in objective.items(): # create a placeholder for the target values if p not in self.target_phs: self.target_phs[p] = tf.placeholder( self.dtype, (self.minibatch_size, None, p.size_in), name="targets") # compute loss if obj == "mse": # note: nan targets converted to zero error target = tf.where(tf.is_nan(self.target_phs[p]), self.probe_arrays[p], self.target_phs[p]) loss.append( tf.reduce_mean(tf.square(target - self.probe_arrays[p]))) elif callable(obj): # move minibatch dimension back to the front loss.append(obj(self.probe_arrays[p], self.target_phs[p])) elif obj is None: # user is directly specifying error, not using objective continue else: raise NotImplementedError if len(loss) > 0: # sum loss across probes (note: this will also sum across # the output of `objective` if it doesn't return a scalar) loss = tf.reduce_sum(loss) else: loss = None self.losses[key] = loss return loss
tf.summary.histogram("loss", loss) merge = tf.summary.merge_all() hm_steps = 25000 sess.run(tf.global_variables_initializer()) input_size = height for batch in shuffle(batch_size, input_size): step, Xp, Y1p, Y2p = batch if step == 0: time.sleep(1) continue debugger = tf.logical_or(tf.is_nan(loss), tf.is_inf(loss)) while (1): d, l = sess.run([debugger, loss], feed_dict = {X:Xp, Y1:Y1p, Y2:Y2p, "YOLO/dropout:0" = 0.5}) if (not d): break else: print("Re-random variables!") sess.run(tf.global_variables_initializer()) summary, _ , lossp, lxy, lwh, lobj, lnoobj, lp = sess.run([merge, trainer, loss, loss_xy, loss_wh, loss_obj, loss_noobj, loss_p], feed_dict = {X: Xp, Y1: Y1p, Y2:Y2p, "YOLO/dropout:0" = 0.5}) print("""Step {} : loss {} loss_xy = {} loss_wh = {} loss_obj = {} loss_noobj = {}
def ScaleGradients(self, var_grads, gradient_adjuster=None): """Scales gradients according to training params. Args: var_grads: a `.NestedMap` whose values are (var, grad) pairs. gradient_adjuster: if not None, a function that mutates a given var_grads. Returns: A `.NestedMap` containing: - has_nan_or_inf: a scalar of 0 or 1, indicating whether there is any NaN or Inf in input gradients. - final_var_grads: a `.NestedMap` whose values are (var, grad) pairs, where gradients have already been scaled. - grad_scale: the gradient scale. 0 if gradient updates should be skipped for the step. (Optional, only returned in case global norm clipping is used.) """ p = self.params # Computes gradients' norm and adds their summaries. Note that all_grad_norm # may be nan, which may cause grad_scale to be nan. for name, vg in var_grads.FlattenItems(): summary_utils.AddNormSummary(name + '/' + p.name, py_utils.NestedMap(s=vg)) all_grad_norm = tf.sqrt( py_utils.SumSquared([ g for (_, g) in py_utils.NestedMap(child=var_grads).Flatten() ])) all_var_norm = tf.sqrt( py_utils.SumSquared([ v for (v, _) in py_utils.NestedMap(child=var_grads).Flatten() ])) grad_norm_is_nan_or_inf = tf.logical_or(tf.is_nan(all_grad_norm), tf.is_inf(all_grad_norm)) # Optional gradient adjustment. Note that this happens after computing # all_grad_norm. if gradient_adjuster is not None: tf.logging.info('gradient_adjuster=%s', gradient_adjuster) var_grads = gradient_adjuster(var_grads) # Handles NaN/Inf gradients. has_nan_or_inf = py_utils.HasNanOrInfGradient(var_grads) # Grad norm can still be inf even if none of the individual grad is inf. has_nan_or_inf = tf.logical_or(has_nan_or_inf, grad_norm_is_nan_or_inf) return_values = py_utils.NestedMap() if p.clip_gradient_single_norm_to_value: # Currently using both types of clipping simultaneously is unsupported. if p.clip_gradient_norm_to_value: raise ValueError( 'Cannot use clip_gradient_single_norm_to_value=%f and ' 'clip_gradient_norm_to_value=%f.' % (p.clip_gradient_single_norm_to_value, p.clip_gradient_norm_to_value)) final_var_grads = py_utils.ApplyGradNormCliping( var_grads, p.clip_gradient_single_norm_to_value) else: grad_scale = self._GetGlobalGradScale(all_grad_norm, has_nan_or_inf) self._AddEvalMetric('grad_norm/all', all_grad_norm, tf.constant(1.0)) self._AddEvalMetric('var_norm/all', all_var_norm, tf.constant(1.0)) self._AddEvalMetric('grad_scale_all', grad_scale, tf.constant(1.0)) final_var_grads = py_utils.ApplyGradMultiplier( var_grads, grad_scale) return_values.grad_scale = grad_scale return_values.has_nan_or_inf = has_nan_or_inf return_values.final_var_grads = final_var_grads return return_values
def angular_symmetry(self, atom_matrix): """ Generate radial basis functions given an atom_matrix consisting of the atom types and coordinates. Parameters ---------- atom_matrix: tf.Tensor An atom matrix of shape (None, 4), where rank 0 determines the number of atoms and rank 1 consists of (t, x, y, z) such that t is a compacted atomic number. Returns ------- tf.Tensor Featurized representation of shape (num_atoms, len(sym.A_Rs)*len(sym.A_thetas)*sym.max_atom_types*(sym.max_atom_types+1)/2) """ num_atoms = tf.shape(atom_matrix)[0] atom_idxs = tf.range(tf.shape(atom_matrix)[0]) atom_types = tf.cast(atom_matrix[:, 0], dtype=tf.int32) atom_coords = atom_matrix[:, 1:] # atom_coords shape: (num_atoms, 3) type_groups_idxs = tf.dynamic_partition(atom_idxs, atom_types, self.max_atom_types, name="dp_angular") lookup = np.array([[[0, 3]],[[2,3]],[[5,3]]]) angular_features = [] for type_a in range(self.max_atom_types): j_idxs = type_groups_idxs[type_a] for type_b in range(type_a, self.max_atom_types): k_idxs = type_groups_idxs[type_b] tile_a = tf.tile(tf.expand_dims(j_idxs, 1), [1, tf.shape(k_idxs)[0]], name="tile_outer1") tile_a = tf.expand_dims(tile_a, 2) tile_b = tf.tile(tf.expand_dims(k_idxs, 0), [tf.shape(j_idxs)[0], 1], name="tile_outer2") tile_b = tf.expand_dims(tile_b, 2) cartesian_product = tf.concat([tile_a, tile_b], axis=2) # int64s? group_coords = tf.nn.embedding_lookup(atom_coords, cartesian_product) # shape: (len(type_a), len(type_b), 2, 3) delta_jk = group_coords[:, :, 0, :] - group_coords[:, :, 1, :] R_jk = tf.norm(delta_jk, axis=-1) dist_vec = tf.reshape(atom_coords, (-1, 1, 1, 1, 3)) # shape (6, 3, 3, 2, 3), vector difference deltas = group_coords - dist_vec # shape: (num_atoms, len(type_a), len(type_b), 2, 3) delta_ij = deltas[:, :, :, 0, :] delta_ik = deltas[:, :, :, 1, :] # LHS computation denom = tf.multiply(tf.norm(delta_ij, axis=-1), (tf.norm(delta_ik, axis=-1))) # dot = tf.reduce_sum(tf.multiply(delta_ij, delta_ik), axis=-1) theta_ijk = tf.acos(dot / denom) # if i=j || j=k then NaN lhs = tf.pow(1 + tf.cos(tf.expand_dims(theta_ijk, -1) - tf.reshape(self.A_thetas, (1, 1, 1, -1))), self.A_zeta) lhs = tf.where(tf.is_nan(lhs), tf.zeros_like(lhs), lhs) # clean up nans numerically, the real zeroing happens later lhs = tf.where(tf.is_inf(lhs), tf.zeros_like(lhs), lhs) # clean up infs numerically, the real zeroing happens later # RHS computation R_ij_ik = tf.norm(deltas, axis=-1) # shape (6, 3, 3, 2), norm distance f_C_true = 0.5*tf.cos(tf.div(np.pi * R_ij_ik, self.A_Rc)) + 0.5 # TODO: refactor with radial code? f_C_flags = tf.nn.relu(tf.sign(self.A_Rc - R_ij_ik)) # 1 if within cutoff, 0 otherwise f_C_R_ij_ik = f_C_true * f_C_flags # note: element wise multiply fCRi_fCRj = tf.multiply(f_C_R_ij_ik[:, :, :, 0], f_C_R_ij_ik[:, :, :, 1]) R_ij = R_ij_ik[:, :, :, 0] R_ik = R_ij_ik[:, :, :, 1] inner = tf.expand_dims((R_ij + R_ik) / 2.0, -1) - tf.reshape(self.A_Rs, (1, 1, 1, -1)) rhs = tf.exp(-self.A_eta*tf.pow(inner, 2)) * tf.expand_dims(fCRi_fCRj, -1) # lhs shape: [num_atoms, len(type_a), len(type_b), len(A_thetas)] # rhs shape: [num_atoms, len(type_a), len(type_b), len(A_Rs)] lhs = tf.expand_dims(lhs, axis=3) rhs = tf.expand_dims(rhs, axis=4) summand = tf.multiply(lhs, rhs) # (num_atoms, len(type_a), len(type_b), len(A_Rs), len(A_thetas)) # zero-out/fix summand elements where i == j || j == k || i == k # we store a triplet of shape # (num_atoms, len(type_a), len(type_b), 3) where 3 is the distance of ij, ik, and jk respectively # R_ij shape: (num_atoms, len(type_a), len(type_b)) # R_ik shape: (num_atoms, len(type_a), len(type_b)) R_jk = tf.tile(tf.expand_dims(R_jk, axis=0), [num_atoms, 1, 1], name="tile_inner") R_ijk = tf.stack([R_ij, R_ik, R_jk], axis=-1) # R_jk shape: (len(type_a), len(type_b)) # We want to form R_ijk of shape (num_atoms, len(type_a), len(type_b), 3) min_dists = tf.reduce_min(R_ijk, axis=-1) keep_flags = tf.nn.relu(tf.sign(tf.abs(min_dists) - 1e-7)) keep_flags = tf.expand_dims(keep_flags, -1) keep_flags = tf.expand_dims(keep_flags, -1) summand = tf.multiply(summand, keep_flags) result = tf.multiply(tf.pow(np.float32(2.0), 1-self.A_zeta), tf.reduce_sum(summand, [1,2])) result = tf.reshape(result, (num_atoms, len(self.A_thetas)*len(self.A_Rs))) angular_features.append(result) angular_features = tf.concat(angular_features, axis=1) angular_features = tf.reshape(angular_features, (num_atoms, self.angular_feature_size())) # ravel return angular_features
def is_nan(self, sess, feed_dict={}): return sess.run(tf.is_nan(self.tensor), feed_dict)
def model_fn(self, features, labels, mode): """Auto-Scaling 3D CNN model. For more information on how to write a model function, see: https://www.tensorflow.org/guide/custom_estimators#write_a_model_function """ input_layer = features * 2.0 - 1.0 # Replace missing values by 0 hidden_layer = tf.where(tf.is_nan(input_layer), tf.zeros_like(input_layer), input_layer) ### kernel_size = [1, 3, 3] hidden_layer_0_0 = self.conv3d_with_batchnorm( features=hidden_layer, depth=16, kernel_size=kernel_size, padding='same', mode=mode == tf.estimator.ModeKeys.TRAIN, layer_order=0) hidden_layer_0_1 = self.conv3d_with_batchnorm( features=hidden_layer, depth=16, kernel_size=[1, 1, 1], padding='same', mode=mode == tf.estimator.ModeKeys.TRAIN, layer_order=1) hideen_layer = tf.concat([hidden_layer_0_0, hidden_layer_0_1], axis=4) ### # Repeatedly apply 3D CNN, followed by 3D max pooling # until the hidden layer has reasonable number of entries REASONABLE_NUM_ENTRIES = 1000 num_filters = 16 # The number of filters is fixed i = 2 while True: shape = hidden_layer.shape kernel_size = [ min(3, shape[1]), min(3, shape[2]), min(3, shape[3]) ] hidden_layer_2 = tf.layers.conv3d(inputs=hidden_layer, filters=num_filters, kernel_size=kernel_size, kernel_initializer=initializer, use_bias=False, padding='same') hidden_layer_2 = tf.layers.batch_normalization( inputs=hidden_layer_2, momentum=0.99, training=mode == tf.estimator.ModeKeys.TRAIN) hidden_layer_2 = prelu(hidden_layer_2, i) i += 1 hidden_layer = tf.concat([hidden_layer, hidden_layer_2], axis=4) pool_size = [min(2, shape[1]), min(2, shape[2]), min(2, shape[3])] hidden_layer = tf.layers.max_pooling3d(inputs=hidden_layer, pool_size=pool_size, strides=pool_size, padding='valid', data_format='channels_last') if get_num_entries(hidden_layer) < REASONABLE_NUM_ENTRIES: break hidden_layer = tf.layers.flatten(hidden_layer) hidden_layer = tf.layers.dense( inputs=hidden_layer, units=1024, kernel_initializer=initializer, use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001) #activation=tf.nn.relu ) hidden_layer = tf.layers.batch_normalization( inputs=hidden_layer, momentum=0.99, training=mode == tf.estimator.ModeKeys.TRAIN) hidden_layer = tf.nn.relu(hidden_layer) hidden_layer = tf.layers.dropout( inputs=hidden_layer, rate=0.2, training=mode == tf.estimator.ModeKeys.TRAIN) logits = tf.layers.dense( inputs=hidden_layer, units=self.output_dim, kernel_initializer=initializer, bias_initializer=initializer, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001), bias_regularizer=tf.contrib.layers.l2_regularizer(0.001)) sigmoid_tensor = tf.nn.sigmoid(logits, name="sigmoid_tensor") predictions = { # Generate predictions (for PREDICT and EVAL mode) "classes": tf.argmax(input=logits, axis=1), # "classes": binary_predictions, # Add `sigmoid_tensor` to the graph. It is used for PREDICT and by the # `logging_hook`. "probabilities": sigmoid_tensor } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Calculate Loss (for both TRAIN and EVAL modes) # For multi-label classification, a correct loss is sigmoid cross entropy loss = sigmoid_cross_entropy_with_logits(labels=labels, logits=logits) # Configure the Training Op (for TRAIN mode) if mode == tf.estimator.ModeKeys.TRAIN: update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) optimizer = tf.train.AdamOptimizer(learning_rate=0.001) #optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.0001) train_op = optimizer.minimize( loss=loss, global_step=tf.train.get_global_step()) train_op = tf.group([train_op, update_ops]) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) # Add evaluation metrics (for EVAL mode) assert mode == tf.estimator.ModeKeys.EVAL eval_metric_ops = { "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"]) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0.1, print_loss=False, normalize=True): # 一共有三层 num_layers = len(anchors) // 3 #---------------------------------------------------------------------------------------------------# # 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true] # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 #---------------------------------------------------------------------------------------------------# y_true = args[num_layers:] yolo_outputs = args[:num_layers] #-----------------------------------------------------------# # 13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401] # 26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146] # 52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28] #-----------------------------------------------------------# anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] # 得到input_shpae为416,416 input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) loss = 0 num_pos = 0 #-----------------------------------------------------------# # 取出每一张图片 # m的值就是batch_size #-----------------------------------------------------------# m = K.shape(yolo_outputs[0])[0] mf = K.cast(m, K.dtype(yolo_outputs[0])) #---------------------------------------------------------------------------------------------------# # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 #---------------------------------------------------------------------------------------------------# for l in range(num_layers): #-----------------------------------------------------------# # 以第一个特征层(m,13,13,3,85)为例子 # 取出该特征层中存在目标的点的位置。(m,13,13,3,1) #-----------------------------------------------------------# object_mask = y_true[l][..., 4:5] #-----------------------------------------------------------# # 取出其对应的种类(m,13,13,3,80) #-----------------------------------------------------------# true_class_probs = y_true[l][..., 5:] if label_smoothing: true_class_probs = _smooth_labels(true_class_probs, label_smoothing) #-----------------------------------------------------------# # 将yolo_outputs的特征层输出进行处理、获得四个返回值 # 其中: # grid (13,13,1,2) 网格坐标 # raw_pred (m,13,13,3,85) 尚未处理的预测结果 # pred_xy (m,13,13,3,2) 解码后的中心坐标 # pred_wh (m,13,13,3,2) 解码后的宽高坐标 #-----------------------------------------------------------# grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) #-----------------------------------------------------------# # pred_box是解码后的预测的box的位置 # (m,13,13,3,4) #-----------------------------------------------------------# pred_box = K.concatenate([pred_xy, pred_wh]) #-----------------------------------------------------------# # 找到负样本群组,第一步是创建一个数组,[] #-----------------------------------------------------------# ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') #-----------------------------------------------------------# # 对每一张图片计算ignore_mask #-----------------------------------------------------------# def loop_body(b, ignore_mask): #-----------------------------------------------------------# # 取出n个真实框:n,4 #-----------------------------------------------------------# true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) #-----------------------------------------------------------# # 计算预测框与真实框的iou # pred_box 13,13,3,4 预测框的坐标 # true_box n,4 真实框的坐标 # iou 13,13,3,n 预测框和真实框的iou #-----------------------------------------------------------# iou = box_iou(pred_box[b], true_box) #-----------------------------------------------------------# # best_iou 13,13,3 每个特征点与真实框的最大重合程度 #-----------------------------------------------------------# best_iou = K.max(iou, axis=-1) #-----------------------------------------------------------# # 判断预测框和真实框的最大iou小于ignore_thresh # 则认为该预测框没有与之对应的真实框 # 该操作的目的是: # 忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了 # 不适合当作负样本,所以忽略掉。 #-----------------------------------------------------------# ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask #-----------------------------------------------------------# # 在这个地方进行一个循环、循环是对每一张图片进行的 #-----------------------------------------------------------# _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) #-----------------------------------------------------------# # ignore_mask用于提取出作为负样本的特征点 # (m,13,13,3) #-----------------------------------------------------------# ignore_mask = ignore_mask.stack() # (m,13,13,3,1) ignore_mask = K.expand_dims(ignore_mask, -1) #-----------------------------------------------------------# # 真实框越大,比重越小,小框的比重更大。 #-----------------------------------------------------------# box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] #-----------------------------------------------------------# # 计算Ciou loss #-----------------------------------------------------------# raw_true_box = y_true[l][..., 0:4] ciou = box_ciou(pred_box, raw_true_box) ciou_loss = object_mask * box_loss_scale * (1 - ciou) #------------------------------------------------------------------------------# # 如果该位置本来有框,那么计算1与置信度的交叉熵 # 如果该位置本来没有框,那么计算0与置信度的交叉熵 # 在这其中会忽略一部分样本,这些被忽略的样本满足条件best_iou<ignore_thresh # 该操作的目的是: # 忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了 # 不适合当作负样本,所以忽略掉。 #------------------------------------------------------------------------------# confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) location_loss = K.sum( tf.where(tf.is_nan(ciou_loss), tf.zeros_like(ciou_loss), ciou_loss)) confidence_loss = K.sum( tf.where(tf.is_nan(confidence_loss), tf.zeros_like(confidence_loss), confidence_loss)) class_loss = K.sum( tf.where(tf.is_nan(class_loss), tf.zeros_like(class_loss), class_loss)) #-----------------------------------------------------------# # 计算正样本数量 #-----------------------------------------------------------# num_pos += tf.maximum(K.sum(K.cast(object_mask, tf.float32)), 1) loss += location_loss + confidence_loss + class_loss # if print_loss: # loss = tf.Print(loss, [loss, location_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ') if normalize: loss = loss / num_pos else: loss = loss / mf return loss
def dice_loss(logits, labels, num_classes, smooth=1e-5, include_background=True, only_present=False): """Calculates a smooth Dice coefficient loss from sparse labels. Args: logits (tf.Tensor): logits prediction for which to calculate crossentropy error labels (tf.Tensor): sparse labels used for crossentropy error calculation num_classes (int): number of class labels to evaluate on smooth (float): smoothing coefficient for the loss computation include_background (bool): flag to include a loss on the background label or not only_present (bool): flag to include only labels present in the inputs or not Returns: tf.Tensor: Tensor scalar representing the loss """ # Get a softmax probability of the logits predictions and a one hot # encoding of the labels tensor probs = tf.nn.softmax(logits) onehot_labels = tf.one_hot( indices=labels, depth=num_classes, dtype=tf.float32, name='onehot_labels') # Compute the Dice similarity coefficient label_sum = tf.reduce_sum(onehot_labels, axis=[1, 2, 3], name='label_sum') pred_sum = tf.reduce_sum(probs, axis=[1, 2, 3], name='pred_sum') intersection = tf.reduce_sum(onehot_labels * probs, axis=[1, 2, 3], name='intersection') per_sample_per_class_dice = (2. * intersection + smooth) per_sample_per_class_dice /= (label_sum + pred_sum + smooth) # Include or exclude the background label for the computation if include_background: flat_per_sample_per_class_dice = tf.reshape( per_sample_per_class_dice, (-1, )) flat_label = tf.reshape(label_sum, (-1, )) else: flat_per_sample_per_class_dice = tf.reshape( per_sample_per_class_dice[:, 1:], (-1, )) flat_label = tf.reshape(label_sum[:, 1:], (-1, )) # Include or exclude non-present labels for the computation if only_present: masked_dice = tf.boolean_mask(flat_per_sample_per_class_dice, tf.logical_not(tf.equal(flat_label, 0))) else: masked_dice = tf.boolean_mask( flat_per_sample_per_class_dice, tf.logical_not(tf.is_nan(flat_per_sample_per_class_dice))) dice = tf.reduce_mean(masked_dice) loss = 1. - dice return loss
def build_model(self): Z = tf.placeholder(tf.float32, [self.batch_size, self.dim_z]) Y = tf.placeholder(tf.float32, [self.batch_size, self.dim_y]) image_real = tf.placeholder(tf.float32, [self.batch_size] + self.image_shape) pred_high = tf.placeholder(tf.float32, [self.batch_size] + self.image_shape) pred_low = tf.placeholder(tf.float32, [self.batch_size] + self.image_shape) h4 = self.generate(Z, Y) #image_gen comes from sigmoid output of generator image_gen = tf.nn.sigmoid(h4) raw_real2 = self.discriminate(image_real, Y) #p_real = tf.nn.sigmoid(raw_real) p_real = tf.reduce_mean(raw_real2) raw_gen2 = self.discriminate(image_gen, Y) #p_gen = tf.nn.sigmoid(raw_gen) p_gen = tf.reduce_mean(raw_gen2) discrim_cost = tf.reduce_mean(raw_real2) - tf.reduce_mean(raw_gen2) gen_cost = -tf.reduce_mean(raw_gen2) mask = tf.placeholder(tf.float32, [self.batch_size] + self.image_shape, name='mask') '''contextual_loss_latter = tf.reduce_sum(tf.contrib.layers.flatten( -tf.log(tf.abs(image_real-image_gen))), 1)''' #contextual_loss_latter = tf.reduce_sum(tf.log(tf.contrib.layers.flatten(tf.abs(image_gen - pred_high))), 1) #log loss '''contextual_loss_latter = tf.reduce_sum(tf.contrib.layers.flatten( -tf.log(tf.maximum( (mask + tf.multiply(tf.ones_like(mask) - mask, pred_high)) - tf.multiply( tf.ones_like(mask) - mask, image_gen), 0.0001*tf.ones_like(mask))) -tf.log(tf.maximum( (mask + tf.multiply(tf.ones_like(mask) - mask, image_gen)) - tf.multiply( tf.ones_like(mask) - mask, pred_low), 0.0001*tf.ones_like(mask)))), 1)''' contextual_loss_latter = tf.contrib.layers.flatten(-tf.log( (mask + tf.multiply(tf.ones_like(mask) - mask, pred_high)) - tf.multiply(tf.ones_like(mask) - mask, image_gen)) - tf.log( (mask + tf.multiply(tf.ones_like(mask) - mask, image_gen)) - tf.multiply(tf.ones_like(mask) - mask, pred_low))) contextual_loss_latter = tf.where( tf.is_nan(contextual_loss_latter), tf.ones_like(contextual_loss_latter) * 1000000.0, contextual_loss_latter) contextual_loss_latter2 = tf.reduce_sum(contextual_loss_latter, 1) #square loss '''contextual_loss_latter = tf.reduce_sum(tf.contrib.layers.flatten( tf.square(tf.multiply(tf.ones_like(mask) - mask, image_gen) - tf.multiply(tf.ones_like(mask) - mask, pred_high))) +tf.contrib.layers.flatten( tf.square( tf.multiply(tf.ones_like(mask) - mask, image_gen) - tf.multiply(tf.ones_like(mask) - mask, pred_high))) , 1)''' contextual_loss_former = tf.reduce_sum( tf.contrib.layers.flatten( tf.square( tf.multiply(mask, image_gen) - tf.multiply(mask, image_real))), 1) contextual_loss_prepare = tf.reduce_sum( tf.contrib.layers.flatten( tf.square( tf.multiply(tf.ones_like(mask) - mask, image_gen) - tf.multiply(tf.ones_like(mask) - mask, image_real))), 1) perceptual_loss = gen_cost complete_loss = contextual_loss_former + self.lam * perceptual_loss + 0.05 * contextual_loss_latter2 grad_complete_loss = tf.gradients(complete_loss, Z) grad_uniform_loss = tf.gradients(contextual_loss_prepare, Z) return Z, Y, image_real, discrim_cost, gen_cost, p_real, p_gen, grad_complete_loss, \ pred_high, pred_low, mask, contextual_loss_latter, contextual_loss_former, grad_uniform_loss
def build_heatmap(in_tensor, config, names = None): num_detections = config.DETECTION_MAX_INSTANCES img_h, img_w = config.IMAGE_SHAPE[:2] batch_size = config.BATCH_SIZE num_classes = config.NUM_CLASSES print('\n ') print(' > NEW build_heatmap() for ', names ) print(' orignal in_tensor shape : ', in_tensor.shape) # rois per image is determined by size of input tensor # detection mode: config.TRAIN_ROIS_PER_IMAGE # ground_truth : config.DETECTION_MAX_INSTANCES rois_per_image = (in_tensor.shape)[2] # strt_cls = 0 if rois_per_image == 32 else 1 print(' num of bboxes per class is : ', rois_per_image ) #----------------------------------------------------------------------------- ## Stack non_zero bboxes from in_tensor into pt2_dense #----------------------------------------------------------------------------- # pt2_ind shape is [?, 3]. # pt2_ind[0] corresponds to image_index # pt2_ind[1] corresponds to class_index # pt2_ind[2] corresponds to roi row_index # pt2_dense shape is [?, 6] # pt2_dense[0] is image index # pt2_dense[1:4] roi cooridnaytes # pt2_dense[5] is class id #----------------------------------------------------------------------------- pt2_sum = tf.reduce_sum(tf.abs(in_tensor[:,:,:,:-2]), axis=-1) print(' pt2_sum shape ',pt2_sum.shape) # print(pt2_sum[0].eval()) pt2_ind = tf.where(pt2_sum > 0) ## replaced the two operations below with the one above - 15-05-2018 # pt2_mask = tf.greater(pt2_sum , 0) # pt2_ind = tf.where(pt2_mask) # print(' pt2_mask shape ', pt2_mask.get_shape()) # print(pt2_mask.eval()) # print(' pt2_ind shape ', pt2_ind.get_shape()) # print(pt2_ind.eval()) pt2_dense = tf.gather_nd( in_tensor, pt2_ind) print(' dense shape ',pt2_dense.get_shape()) #----------------------------------------------------------------------------- ## Build mesh-grid to hold pixel coordinates #----------------------------------------------------------------------------- X = tf.range(img_w, dtype=tf.int32) Y = tf.range(img_h, dtype=tf.int32) X, Y = tf.meshgrid(X, Y) # duplicate (repeat) X and Y into a batch_size x rois_per_image tensor print(' X/Y shapes :', X.get_shape(), Y.get_shape()) ones = tf.ones([tf.shape(pt2_dense)[0] , 1, 1], dtype = tf.int32) rep_X = ones * X rep_Y = ones * Y print(' Ones: ', ones.shape) print(' ones_exp * X', ones.shape, '*', X.shape, '= ',rep_X.shape) print(' ones_exp * Y', ones.shape, '*', Y.shape, '= ',rep_Y.shape) # # stack the X and Y grids bef_pos = tf.to_float(tf.stack([rep_X,rep_Y], axis = -1)) print(' before transpse ', bef_pos.get_shape()) pos_grid = tf.transpose(bef_pos,[1,2,0,3]) print(' after transpose ', pos_grid.get_shape()) #----------------------------------------------------------------------------- ## Build mean and convariance tensors for Multivariate Normal Distribution #----------------------------------------------------------------------------- width = pt2_dense[:,3] - pt2_dense[:,1] # x2 - x1 height = pt2_dense[:,2] - pt2_dense[:,0] cx = pt2_dense[:,1] + ( width / 2.0) cy = pt2_dense[:,0] + ( height / 2.0) means = tf.stack((cx,cy),axis = -1) covar = tf.stack((width * 0.5 , height * 0.5), axis = -1) covar = tf.sqrt(covar) tfd = tf.contrib.distributions mvn = tfd.MultivariateNormalDiag( loc = means, scale_diag = covar) prob_grid = mvn.prob(pos_grid) print(' Prob_grid shape before tanspose: ',prob_grid.get_shape()) prob_grid = tf.transpose(prob_grid,[2,0,1]) print(' Prob_grid shape after tanspose: ',prob_grid.get_shape()) print(' >> input to MVN.PROB: pos_grid (meshgrid) shape: ', pos_grid.get_shape()) print(' << output probabilities shape:' , prob_grid.get_shape()) #-------------------------------------------------------------------------------- ## IMPORTANT: kill distributions of NaN boxes (resulting from bboxes with height/width of zero ## which cause singular sigma cov matrices #-------------------------------------------------------------------------------- prob_grid = tf.where(tf.is_nan(prob_grid), tf.zeros_like(prob_grid), prob_grid) # scatter out the probability distributions based on class -------------------------- print('\n Scatter out the probability distributions based on class --------------') gauss_scatt = tf.scatter_nd(pt2_ind, prob_grid, [batch_size, num_classes, rois_per_image, img_w, img_h]) print(' pt2_ind shape : ', pt2_ind.shape) print(' prob_grid shape : ', prob_grid.shape) print(' gauss_scatt : ', gauss_scatt.shape) # batch_sz , num_classes, num_rois, image_h, image_w # heatmap: sum gauss_scattered based on class --------------------------------------- print('\n Reduce sum based on class ---------------------------------------------') gauss_sum = tf.reduce_sum(gauss_scatt, axis=2, name='pred_heatmap2') gauss_sum = tf.where(gauss_sum > 1e-12, gauss_sum, tf.zeros_like(gauss_sum)) print(' gaussian_sum shape : ', gauss_sum.get_shape(), 'Keras tensor ', KB.is_keras_tensor(gauss_sum) ) ##--------------------------------------------------------------------------------------------- ## heatmap L2 normalization ## Normalization using the `gauss_sum` (batchsize , num_classes, height, width) ## 17-05-2018 (New method, replace dthe previous method that usedthe transposed gauss sum ## 17-05-2018 Replaced with normalization across the CLASS axis ##--------------------------------------------------------------------------------------------- # print('\n L2 normalization ------------------------------------------------------') gauss_L2norm = KB.l2_normalize(gauss_sum, axis = +1) # normalize along the CLASS axis print(' gauss L2 norm : ', gauss_L2norm.shape ,' Keras tensor ', KB.is_keras_tensor(gauss_L2norm) ) print('\n normalization ------------------------------------------------------') gauss_norm = gauss_sum / tf.reduce_max(gauss_sum, axis=[-2,-1], keepdims = True) gauss_norm = tf.where(tf.is_nan(gauss_norm), tf.zeros_like(gauss_norm), gauss_norm) print(' gauss norm : ', gauss_norm.shape ,' Keras tensor ', KB.is_keras_tensor(gauss_norm) ) ##-------------------------------------------------------------------------------------------- ## generate score based on gaussian using bounding box masks ## NOTE: Score is generated on NORMALIZED gaussian distributions (GAUSS_NORM) ## If want to do this on NON-NORMALIZED, we need to apply it on GAUSS_SUM ##-------------------------------------------------------------------------------------------- # flatten guassian scattered and input_tensor, and pass on to build_bbox_score routine in_shape = tf.shape(in_tensor) in_tensor_flattened = tf.reshape(in_tensor, [-1, in_shape[-1]]) bboxes = tf.to_int32(tf.round(in_tensor_flattened[...,0:4])) print(' in_tensor ', in_tensor.shape) print(' in_tensorr_flattened is ', in_tensor_flattened.shape) print(' boxes shape ', bboxes.shape) print(' Rois per image : ', rois_per_image) #-------------------------------------------------------------------------------------------------------------------------- # duplicate GAUSS_NORM <num_roi> times to pass along with bboxes to map_fn function # Here we have a choice to calculate scores using the GAUSS_SUM (unnormalized) or GAUSS_NORM (normalized) # after looking at the scores and ratios for each option, I decided to go with the normalized # as the numbers are larger # # Examples> # Using GAUSS_SUM # [ 3.660313 3.513489 54.475536 52.747402 1. 0.999997 4.998889 2450. 0.00204 0.444867] # [ 7.135149 1.310972 50.020126 44.779854 1. 0.999991 4.981591 1892. 0.002633 0.574077] # [ 13.401865 0. 62.258957 46.636948 1. 0.999971 4.957398 2303. 0.002153 0.469335] # [ 0. 0. 66.42349 56.123024 1. 0.999908 4.999996 3696. 0.001353 0.294958] # [ 0. 0. 40.78952 60.404335 1. 0.999833 4.586552 2460. 0.001864 0.406513] # # Using GAUSS_NORM: # [ 3.660313 3.513489 54.475536 52.747402 1. 0.999997 1832.9218 2450. 0.748131 0.479411] # [ 7.135149 1.310972 50.020126 44.779854 1. 0.999991 1659.3965 1892. 0.877059 0.56203 ] # [ 13.401865 0. 62.258957 46.636948 1. 0.999971 1540.4974 2303. 0.668909 0.428645] # [ 0. 0. 66.42349 56.123024 1. 0.999908 1925.3267 3696. 0.520922 0.333813] # [ 0. 0. 40.78952 60.404335 1. 0.999833 1531.321 2460. 0.622488 0.398898] # # to change the source, change the following line gauss_norm <--> gauss_sum #--------------------------------------------------------------------------------------------------------------------------- temp = tf.expand_dims(gauss_norm, axis =2) temp = tf.tile(temp, [1,1, rois_per_image ,1,1]) temp_shape = KB.int_shape(temp) temp_reshape = KB.reshape(temp, (-1, temp_shape[-2], temp_shape[-1])) print(' heatmap original shape : ', gauss_norm.shape) print(' heatmap replicated : ', temp_shape) print(' heatmap flattened : ', temp_reshape.shape) scores = tf.map_fn(build_mask_routine, [temp_reshape, bboxes], dtype=tf.float32) # consider the two new columns for reshaping the gaussian_bbox_scores new_shape = tf.shape(in_tensor)+ [0,0,0, tf.shape(scores)[-1]] bbox_scores = tf.concat([in_tensor_flattened, scores], axis = -1) bbox_scores = tf.reshape(bbox_scores, new_shape) # print(' new shape is : ', new_shape.eval()) print(' in_tensor_flattened : ', in_tensor_flattened.shape) print(' Scores shape : ', scores.shape) # [(num_batches x num_class x num_rois ), 3] print(' boxes_scores (rehspaed) : ', bbox_scores.shape) ##-------------------------------------------------------------------------------------------- ## Normalize computed score above, and add it to the heatmap_score tensor as last column ##-------------------------------------------------------------------------------------------- scr_L2norm = tf.nn.l2_normalize(bbox_scores[...,-1], axis = -1) # shape (num_imgs, num_class, num_rois) scr_L2norm = tf.expand_dims(scr_L2norm, axis = -1) ##-------------------------------------------------------------------------------------------- # shape of tf.reduce_max(bbox_scores[...,-1], axis = -1, keepdims=True) is (num_imgs, num_class, 1) # This is a regular normalization that moves everything between [0, 1]. # This causes negative values to move to -inf, which is a problem in FCN scoring. # To address this a normalization between [-1 and +1] was introduced in FCN. # Not sure how this will work with training tho. ##-------------------------------------------------------------------------------------------- scr_norm = bbox_scores[...,-1]/ tf.reduce_max(bbox_scores[...,-1], axis = -1, keepdims=True) scr_norm = tf.where(tf.is_nan(scr_norm), tf.zeros_like(scr_norm), scr_norm) #-------------------------------------------------------------------------------------------- # this normalization moves values to [-1, +1] which we use in FCN, but not here. #-------------------------------------------------------------------------------------------- # reduce_max = tf.reduce_max(bbox_scores[...,-1], axis = -1, keepdims=True) # reduce_min = tf.reduce_min(bbox_scores[...,-1], axis = -1, keepdims=True) ## epsilon = tf.ones_like(reduce_max) * 1e-7 # scr_norm = (2* (bbox_scores[...,-1] - reduce_min) / (reduce_max - reduce_min)) - 1 scr_norm = tf.where(tf.is_nan(scr_norm), tf.zeros_like(scr_norm), scr_norm) scr_norm = tf.expand_dims(scr_norm, axis = -1) # shape (num_imgs, num_class, 32, 1) bbox_scores = tf.concat([bbox_scores, scr_norm, scr_L2norm], axis = -1) gauss_heatmap = KB.identity(tf.transpose(gauss_sum,[0,2,3,1]), name = names[0]) gauss_heatmap_norm = KB.identity(tf.transpose(gauss_norm,[0,2,3,1]), name = names[0]+'_norm') gauss_heatmap_L2norm = KB.identity(tf.transpose(gauss_L2norm,[0,2,3,1]), name = names[0]+'_L2norm') gauss_scores = KB.identity(bbox_scores, name = names[0]+'_scores') print(' gauss_heatmap final shape : ', gauss_heatmap.shape ,' Keras tensor ', KB.is_keras_tensor(gauss_heatmap) ) print(' gauss_scores final shape : ', gauss_scores.shape ,' Keras tensor ', KB.is_keras_tensor(gauss_scores) ) print(' complete') return gauss_heatmap_norm, gauss_scores, gauss_heatmap,gauss_heatmap_L2norm # [gauss_sum, gauss_scatt, means, covar]
def pirank_deep_loss(labels, logits, features, tau, taustar, ndcg_k, top_k, list_size, merge_block_size, ste, straight_backprop, full_loss, tau_scheme, seed, depth=1): if not top_k: top_k = ndcg_k # Just here we want them the same assert top_k == ndcg_k assert merge_block_size >= top_k assert list_size % merge_block_size == 0 assert list_size >= merge_block_size assert merge_block_size % top_k == 0 div = merge_block_size // top_k with tf.name_scope("merge_nr_scope_depth_{}".format(depth)): is_label_valid = tfr.utils.is_label_valid(labels) labels = tf.where(is_label_valid, labels, tf.zeros_like(labels), name='labels') logits = tf.where( is_label_valid, logits, -1e-6 * tf.ones_like(logits) + tf.reduce_min(input_tensor=logits, axis=1, keepdims=True), name='logits') if list_size > merge_block_size: # Merge top_k logits from previous layers of merge logits, phat_tops = map( list( zip(*[ pirank_deep_loss(labels[:, i:(i + list_size // div)], logits[:, i:(i + list_size // div)], None, tau, taustar, ndcg_k, top_k, list_size // div, merge_block_size, ste, full_loss, depth=depth + 1) for i in range(0, list_size, list_size // div) ])).__getitem__, [0, 1]) logits = tf.concat(list(logits), 1, name='merged_logits') phat_tops = list(phat_tops) if not full_loss: labels = tf.concat(phat_tops, 1, name='merged_labels') # Get P_hat logits = tf.expand_dims(logits, 2, name="logits_exp") labels = tf.expand_dims(labels, 2, name="labels_exp") tau = tau**depth if tau_scheme == 'square' else tau if ste or depth == 1: P_hat_backward = util.neuralsort(logits, tau) P_hat_backward = tf.identity(P_hat_backward, name="P_hat_backward") P_hat_forward = util.neuralsort(logits, taustar) P_hat = P_hat_backward + tf.stop_gradient(P_hat_forward - P_hat_backward) else: P_hat = util.neuralsort(logits, tau) P_hat = tf.identity(P_hat, name="P_hat") phat_top = P_hat[:, :top_k, :] if full_loss and list_size > merge_block_size: # Do the recursive product if phat_tops is not None: res = [] for i, pt in enumerate(phat_tops): l, h = i * top_k, (i + 1) * top_k res.append(tf.matmul(phat_top[:, :, l:h], pt)) phat_top = tf.concat(res, 2) assert phat_top.shape[2] == list_size phat_top = tf.identity(phat_top, name='phat_top') if depth == 1: # Any deeper layers than last, return top_k label_powers = tf.pow(2.0, tf.cast(labels, dtype=tf.float32), name="label_powers") - 1.0 sorted_powers = tf.matmul(phat_top, label_powers, name='sorted_powers') numerator = tf.reduce_sum(sorted_powers, axis=-1, name="dcg_numerator") position = tf.cast(tf.range(1, ndcg_k + 1), dtype=tf.float32, name="dcg_position") denominator = tf.math.log(position + 1, name="dcg_denominator") dcg = numerator / (1e-10 + denominator) dcg = tf.reduce_sum(input_tensor=dcg, axis=1, keepdims=True, name="dcg") labels = tf.squeeze(labels, 2) ideal_sorted_labels, _ = tf.nn.top_k(labels, k=ndcg_k, sorted=True) numerator = tf.pow(2.0, tf.cast(ideal_sorted_labels, dtype=tf.float32), name="ideal_dcg_numerator") - 1.0 ideal_dcg = numerator / (1e-10 + denominator) ideal_dcg = tf.reduce_sum(ideal_dcg, axis=1, keepdims=True, name="ideal_dcg") dcg = tf.where(tf.is_nan(dcg), tf.zeros_like(dcg), dcg) ideal_dcg = tf.where(tf.is_nan(ideal_dcg), tf.ones_like(ideal_dcg), ideal_dcg) ndcg = tf.reduce_sum(dcg) / (1e-10 + tf.reduce_sum(ideal_dcg)) ndcg = tf.identity(ndcg, name='ndcg') return 1. - ndcg else: topk_logits = tf.matmul(phat_top, logits) if straight_backprop: topk_logits = tf.stop_gradient(topk_logits) topk_logits = tf.squeeze(topk_logits, 2, name="topk_logits") if not full_loss: topk_labels = tf.matmul(phat_top, labels) topk_labels = tf.squeeze(topk_labels, 2, name="topk_labels") return topk_logits, phat_top if full_loss else topk_labels
def lrp_lstm(r_out, o, c, i, z, w_o, w_i, act_h, multiplication_rule, alpha=1, beta=0, o_min=None, i_min=None, c_min=None, z_min=None, o_max=None, i_max=None, c_max=None, z_max=None): """lrp applied to TeLL LSTMLayer Parameters ------- r_out : tensor (batchsize, timesteps, units) o, c, i, z : tensor (batchsize, timesteps, units) w_o, w_i : tensor (incoming, outgoing) act_h activation function after cell multiplication_rule : int 0...50/50 rule; 1...proportional rule; 3...no multiplication rule, no relevance through recurrent gate connections """ n_timesteps = tf.shape(r_out)[1] if multiplication_rule == 0: mul_rule = additive_multiplication_rule elif multiplication_rule == 1: mul_rule = proportional_multiplication_rule elif multiplication_rule == 3: mul_rule = None else: raise AttributeError("Only multiplication_rule 0 and 1 are implemented") if beta == 0: o_min = tf.reduce_min(o, axis=1) i_min = tf.reduce_min(i, axis=1) c_min = tf.reduce_min(c, axis=1) z_min = tf.reduce_min(z, axis=1) o_max = tf.reduce_max(o, axis=1) i_max = tf.reduce_max(i, axis=1) c_max = tf.reduce_max(c, axis=1) z_max = tf.reduce_max(z, axis=1) else: o_min = tf.reduce_mean(o, axis=1) i_min = tf.reduce_mean(i, axis=1) c_min = tf.reduce_mean(c, axis=1) z_min = tf.reduce_mean(z, axis=1) o_max = 1 i_max = 1 c_max = 1 z_max = 1 # Create an set initializations for dict with LRP variables lrp_keys = ['r_from_o', 'r_from_i', 'r_cc', 'r_y', 'r_cy', 'r_o', 'r_c', 'r_zi', 'r_z', 'r_i'] zero = tf.constant(0, dtype=tf.int32) zero_init = tf.zeros_like(r_out[:, 0:1, :], tf.float32) lrp_dict = OrderedDict([(k, zero_init) for k in lrp_keys]) with tf.name_scope("LRPRNNLoop"): # Create initial tensors init_tensors = OrderedDict([('t', n_timesteps-1)]) init_tensors.update(lrp_dict) # Get initial tensor shapes in tf format init_shapes = OrderedDict([('t', init_tensors['t'].get_shape())]) lrp_shapes = OrderedDict((k, tf.TensorShape(lrp_dict[k].get_shape().as_list()[:1] + [None] + lrp_dict[k].get_shape().as_list()[2:])) for k in lrp_dict.keys()) init_shapes.update(lrp_shapes) def cond(t, *args): return tf.greater(t, zero) def body(t, r_from_o, r_from_i, r_cc, r_y, r_cy, r_o, r_c, r_zi, r_z, r_i): # # for time t # if mul_rule is None: r_y = tf.concat([r_y, tf.expand_dims(r_out[:, t, :], axis=1)], axis=1) r_cy = tf.concat([r_cy, tf.expand_dims(r_y[:, -1, :], axis=1)], axis=1) r_o = tf.concat([r_o, zero_init], axis=1) r_c = tf.concat([r_c, tf.expand_dims(r_cy[:, -1, :] + r_cc[:, -1, :], axis=1)], axis=1) r_zi_new = tf.expand_dims(r_c[:, -1, :] * (i[:, t, :] * z[:, t, :] / c[:, t, :]), axis=1) r_zi = tf.concat([r_zi, tf.where(tf.is_nan(r_zi_new), zero_init, r_zi_new)], axis=1) r_z = tf.concat([r_z, tf.expand_dims(r_zi[:, -1, :], axis=1)], axis=1) r_i = tf.concat([r_i, zero_init], axis=1) else: r_y = tf.concat([r_y, tf.expand_dims(r_out[:, t, :] + r_from_o[:, -1, :] + r_from_i[:, -1, :], axis=1)], axis=1) r_cy = tf.concat([r_cy, tf.expand_dims(mul_rule(act_h(c[:, t, :]), o[:, t, :], r_y[:, -1, :], c_min, c_max, o_min, o_max), axis=1)], axis=1) r_o = tf.concat([r_o, tf.expand_dims(mul_rule(o[:, t, :], act_h(c[:, t, :]), r_y[:, -1, :], o_min, o_max, c_min, c_max), axis=1)], axis=1) r_c = tf.concat([r_c, tf.expand_dims(r_cy[:, -1, :] + r_cc[:, -1, :], axis=1)], axis=1) r_zi_new = tf.expand_dims(r_c[:, -1, :] * (i[:, t, :] * z[:, t, :] / c[:, t, :]), axis=1) r_zi = tf.concat([r_zi, tf.where(tf.is_nan(r_zi_new), zero_init, r_zi_new)], axis=1) r_z = tf.concat([r_z, tf.expand_dims(mul_rule(z[:, t, :], i[:, t, :], r_zi[:, -1, :], z_min, z_max, i_min, i_max), axis=1)], axis=1) r_i = tf.concat([r_i, tf.expand_dims(mul_rule(i[:, t, :], z[:, t, :], r_zi[:, -1, :], i_min, i_max, z_min, z_max), axis=1)], axis=1) # # distribute R to units through recurrent connections # r_from_o_t = lrp(r=r_o[:, -1, :], w=w_o, x=o[:, t-1, :], x_min=o_min, alpha=alpha, beta=beta) r_from_o = tf.concat([r_from_o, tf.expand_dims(r_from_o_t, axis=1)], axis=1) r_from_i_t = lrp(r=r_i[:, -1, :], w=w_i, x=i[:, t-1, :], x_min=i_min, alpha=alpha, beta=beta) r_from_i = tf.concat([r_from_i, tf.expand_dims(r_from_i_t, axis=1)], axis=1) # # for time t-1 # r_cc_new = tf.expand_dims(c[:, t-1, :] / c[:, t, :] * r_c[:, -1, :], axis=1) r_cc = tf.concat([r_cc, tf.where(tf.is_nan(r_cc_new), zero_init, r_cc_new)], axis=1) t -= 1 return [t, r_from_o, r_from_i, r_cc, r_y, r_cy, r_o, r_c, r_zi, r_z, r_i] wl_ret = tf.while_loop(cond=cond, body=body, loop_vars=tuple(init_tensors.values()), shape_invariants=tuple(init_shapes.values()), parallel_iterations=10, back_prop=True, swap_memory=True) # Re-Associate returned tensors with keys r_collection = OrderedDict(zip(init_tensors.keys(), wl_ret)) _ = r_collection.pop('t') # Remove artificial timestep at end of sequences (sequences are in reversed temporal order) for k in r_collection.keys(): if k not in ['r_from_o', 'r_from_i', 'r_cc']: r_collection[k] = r_collection[k][:, 1:, :] # # for time t=0 # t = 0 if mul_rule is None: r_collection['r_y'] = tf.concat([r_collection['r_y'], tf.expand_dims(r_out[:, t, :], axis=1)], axis=1) r_collection['r_cy'] = tf.concat([r_collection['r_cy'], tf.expand_dims(r_collection['r_y'][:, -1, :], axis=1)], axis=1) r_collection['r_o'] = tf.concat([r_collection['r_o'], zero_init], axis=1) r_collection['r_c'] = tf.concat([r_collection['r_c'], tf.expand_dims(r_collection['r_cy'][:, -1, :] + r_collection['r_cc'][:, -1, :], axis=1)], axis=1) r_collection['r_zi_new'] = tf.expand_dims(r_collection['r_c'][:, -1, :] * (i[:, t, :] * z[:, t, :] / c[:, t, :]), axis=1) r_collection['r_zi'] = tf.concat([r_collection['r_zi'], tf.where(tf.is_nan(r_collection['r_zi_new']), zero_init, r_collection['r_zi_new'])], axis=1) r_collection['r_z'] = tf.concat([r_collection['r_z'], tf.expand_dims(r_collection['r_zi'][:, -1, :], axis=1)], axis=1) r_collection['r_i'] = tf.concat([r_collection['r_i'], zero_init], axis=1) else: r_collection['r_y'] = tf.concat([r_collection['r_y'], tf.expand_dims(r_out[:, t, :] + r_collection['r_from_o'][:, -1, :] + r_collection['r_from_i'][:, -1, :], axis=1)], axis=1) r_collection['r_cy'] = tf.concat([r_collection['r_cy'], tf.expand_dims(mul_rule(act_h(c[:, t, :]), o[:, t, :], r_collection['r_y'][:, -1, :], c_min, c_max, o_min, o_max), axis=1)], axis=1) r_collection['r_o'] = tf.concat([r_collection['r_o'], tf.expand_dims(mul_rule(o[:, t, :], act_h(c[:, t, :]), r_collection['r_y'][:, -1, :], o_min, o_max, c_min, c_max), axis=1)], axis=1) r_collection['r_c'] = tf.concat([r_collection['r_c'], tf.expand_dims(r_collection['r_cy'][:, -1, :] + r_collection['r_cc'][:, -1, :], axis=1)], axis=1) r_zi_new = tf.expand_dims(r_collection['r_c'][:, -1, :] * (i[:, t, :] * z[:, t, :] / c[:, t, :]), axis=1) r_collection['r_zi'] = tf.concat([r_collection['r_zi'], tf.where(tf.is_nan(r_zi_new), zero_init, r_zi_new)], axis=1) r_collection['r_z'] = tf.concat([r_collection['r_z'], tf.expand_dims(mul_rule(z[:, t, :], i[:, t, :], r_collection['r_zi'][:, -1, :], z_min, z_max, i_min, i_max), axis=1)], axis=1) r_collection['r_i'] = tf.concat([r_collection['r_i'], tf.expand_dims(mul_rule(i[:, t, :], z[:, t, :], r_collection['r_zi'][:, -1, :], i_min, i_max, z_min, z_max), axis=1)], axis=1) # # Initialize input gate and output gate relevance with 0 # r_from_o = [tf.zeros_like(r_out[:, 0, :], tf.float32)] # r_o redistributed to the individual units in t-1 # r_from_i = [tf.zeros_like(r_out[:, 0, :], tf.float32)] # r_i redistributed to the individual units in t-1 # r_cc = [tf.zeros_like(r_out[:, 0, :], tf.float32)] # r_ct<-ct+1 # # r_y = [] # r_cy = [] # r_ct<-yt # r_o = [] # r_c = [] # r_zi = [] # r_z = [] # r_i = [] # for t in rev_timesteps: # # # # for time t # # # if mul_rule is None: # r_y.append(r_out[:, t, :]) # r_cy.append(r_y[-1]) # r_o.append(tf.zeros_like(r_y[-1])) # # r_c.append(r_cy[-1] + r_cc[-1]) # # r_zi.append(r_c[-1] * (i[:, t, :] * z[:, t, :] / c[:, t, :])) # r_zi[-1] = tf.where(tf.is_nan(r_zi[-1]), tf.zeros_like(r_zi[-1]), r_zi[ # -1]) # TODO: This only holds for all-positive case! Otherwise we will need to consider r_zi[-2] to assign either full R or 0 # # r_z.append(r_zi[-1]) # r_i.append(tf.zeros_like(r_zi[-1])) # # else: # r_y.append(r_out[:, t, :] + r_from_o[-1] + r_from_i[-1]) # r_cy.append(mul_rule(act_h(c[:, t, :]), o[:, t, :], r_y[-1], c_min, c_max, o_min, o_max)) # r_o.append(mul_rule(o[:, t, :], act_h(c[:, t, :]), r_y[-1], o_min, o_max, c_min, c_max)) # # r_c.append(r_cy[-1] + r_cc[-1]) # # r_zi.append(r_c[-1] * (i[:, t, :] * z[:, t, :] / c[:, t, :])) # r_zi[-1] = tf.where(tf.is_nan(r_zi[-1]), tf.zeros_like(r_zi[-1]), r_zi[-1]) # TODO: This only holds for all-positive case! Otherwise we will need to consider r_zi[-2] to assign either full R or 0 # # r_z.append(mul_rule(z[:, t, :], i[:, t, :], r_zi[-1], z_min, z_max, i_min, i_max)) # r_i.append(mul_rule(i[:, t, :], z[:, t, :], r_zi[-1], i_min, i_max, z_min, z_max)) # # if t > 0: # # # # distribute R to units through recurrent connections # # # r_from_o_t = lrp(r=r_o[-1], w=w_o, x=o[:, t-1, :], x_min=o_min, alpha=alpha, beta=beta) # r_from_o.append(r_from_o_t) # # r_from_i_t = lrp(r=r_i[-1], w=w_i, x=i[:, t-1, :], x_min=i_min, alpha=alpha, beta=beta) # r_from_i.append(r_from_i_t) # # # # # for time t-1 # # # r_cc.append(c[:, t-1, :] / c[:, t, :] * r_c[-1]) # r_cc[-1] = tf.where(tf.is_nan(r_cc[-1]), tf.zeros_like(r_cc[-1]), r_cc[-1]) # TODO: This only holds for all-positive case! # # r_collection = dict(r_from_o=tf.stack(r_from_o, axis=1), r_from_i=tf.stack(r_from_i, axis=1), # r_cc=tf.stack(r_cc, axis=1), r_y=tf.stack(r_y, axis=1), r_cy=tf.stack(r_cy, axis=1), # r_o=tf.stack(r_o, axis=1), r_c=tf.stack(r_c, axis=1), r_zi=tf.stack(r_zi, axis=1), # r_z=tf.stack(r_z, axis=1), r_i=tf.stack(r_i, axis=1)) # Relevance is stored with reversed time dimension - correct it r_collection = OrderedDict((k, v[:, ::-1, :]) for k, v in r_collection.items()) return r_collection['r_z'], r_collection
def interp_regular_1d_grid(x, x_ref_min, x_ref_max, y_ref, axis=-1, fill_value='constant_extension', fill_value_below=None, fill_value_above=None, grid_regularizing_transform=None, name=None): """Linear `1-D` interpolation on a regular (constant spacing) grid. Given reference values, this function computes a piecewise linear interpolant and evaluates it on a new set of `x` values. The interpolant is built from `M` reference values indexed by one dimension of `y_ref` (specified by the `axis` kwarg). If `y_ref` is a vector, then each value `y_ref[i]` is considered to be equal to `f(x_ref[i])`, for `M` (implicitly defined) reference values between `x_ref_min` and `x_ref_max`: ```none x_ref[i] = x_ref_min + i * (x_ref_max - x_ref_min) / (M - 1), i = 0, ..., M - 1. ``` If `rank(y_ref) > 1`, then `y_ref` contains `M` reference values of a `rank(y_ref) - 1` rank tensor valued function of one variable. `x_ref` is a `Tensor` of values of that variable (any shape allowed). Args: x: Numeric `Tensor` The x-coordinates of the interpolated output values. x_ref_min: `Tensor` of same `dtype` as `x`. The minimum value of the (implicitly defined) reference `x_ref`. x_ref_max: `Tensor` of same `dtype` as `x`. The maximum value of the (implicitly defined) reference `x_ref`. y_ref: `N-D` `Tensor` (`N > 0`) of same `dtype` as `x`. The reference output values. axis: Scalar `Tensor` designating the dimension of `y_ref` that indexes values of the interpolation variable. Default value: `-1`, the rightmost axis. fill_value: Determines what values output should take for `x` values that are below `x_ref_min` or above `x_ref_max`. `Tensor` or one of the strings "constant_extension" ==> Extend as constant function. "extrapolate" ==> Extrapolate in a linear fashion. Default value: `"constant_extension"` fill_value_below: Optional override of `fill_value` for `x < x_ref_min`. fill_value_above: Optional override of `fill_value` for `x > x_ref_max`. grid_regularizing_transform: Optional transformation `g` which regularizes the implied spacing of the x reference points. In other words, if provided, we assume `g(x_ref_i)` is a regular grid between `g(x_ref_min)` and `g(x_ref_max)`. name: A name to prepend to created ops. Default value: `"interp_regular_1d_grid"`. Returns: y_interp: Interpolation between members of `y_ref`, at points `x`. `Tensor` of same `dtype` as `x`, and shape `y.shape[:axis] + x.shape + y.shape[axis + 1:]` Raises: ValueError: If `fill_value` is not an allowed string. ValueError: If `axis` is not a scalar. #### Examples Interpolate a function of one variable: ```python y_ref = tf.exp(tf.linspace(start=0., stop=10., 20)) interp_regular_1d_grid( x=[6.0, 0.5, 3.3], x_ref_min=0., x_ref_max=1., y_ref=y_ref) ==> approx [exp(6.0), exp(0.5), exp(3.3)] ``` Interpolate a matrix-valued function of one variable: ```python mat_0 = [[1., 0.], [0., 1.]] mat_1 = [[0., -1], [1, 0]] y_ref = [mat_0, mat_1] # Get three output matrices at once. tfp.math.interp_regular_1d_grid( x=[0., 0.5, 1.], x_ref_min=0., x_ref_max=1., y_ref=y_ref, axis=0) ==> [mat_0, 0.5 * mat_0 + 0.5 * mat_1, mat_1] ``` Interpolate a function of one variable on a log-spaced grid: ```python x_ref = tf.exp(tf.linspace(tf.log(1.), tf.log(100000.), num_pts)) y_ref = tf.log(x_ref + x_ref**2) interp_regular_1d_grid(x=[1.1, 2.2], x_ref_min=1., x_ref_max=100000., y_ref, grid_regularizing_transform=tf.log) ==> [tf.log(1.1 + 1.1**2), tf.log(2.2 + 2.2**2)] ``` """ with tf.name_scope( name, 'interp_regular_1d_grid', values=[ x, x_ref_min, x_ref_max, y_ref, axis, fill_value, fill_value_below, fill_value_above ]): # Arg checking. allowed_fv_st = ('constant_extension', 'extrapolate') for fv in (fill_value, fill_value_below, fill_value_above): if isinstance(fv, str) and fv not in allowed_fv_st: raise ValueError( 'A fill value ({}) was not an allowed string ({})'.format( fv, allowed_fv_st)) # Separate value fills for below/above incurs extra cost, so keep track of # whether this is needed. need_separate_fills = ( fill_value_above is not None or fill_value_below is not None or fill_value == 'extrapolate' # always requries separate below/above ) if need_separate_fills and fill_value_above is None: fill_value_above = fill_value if need_separate_fills and fill_value_below is None: fill_value_below = fill_value axis = tf.convert_to_tensor(axis, name='axis', dtype=tf.int32) _assert_ndims_statically(axis, expect_ndims=0) axis = distribution_util.make_non_negative_axis(axis, tf.rank(y_ref)) dtype = dtype_util.common_dtype([x, x_ref_min, x_ref_max, y_ref], preferred_dtype=tf.float32) x = tf.convert_to_tensor(x, name='x', dtype=dtype) x_ref_min = tf.convert_to_tensor(x_ref_min, name='x_ref_min', dtype=dtype) x_ref_max = tf.convert_to_tensor(x_ref_max, name='x_ref_max', dtype=dtype) y_ref = tf.convert_to_tensor(y_ref, name='y_ref', dtype=dtype) ny = tf.cast(tf.shape(y_ref)[axis], dtype) # Map [x_ref_min, x_ref_max] to [0, ny - 1]. # This is the (fractional) index of x. if grid_regularizing_transform is None: g = lambda x: x else: g = grid_regularizing_transform fractional_idx = ((g(x) - g(x_ref_min)) / (g(x_ref_max) - g(x_ref_min))) x_idx_unclipped = fractional_idx * (ny - 1) # Wherever x is NaN, x_idx_unclipped will be NaN as well. # Keep track of the nan indices here (so we can impute NaN later). # Also eliminate any NaN indices, since there is not NaN in 32bit. nan_idx = tf.is_nan(x_idx_unclipped) x_idx_unclipped = tf.where(nan_idx, tf.zeros_like(x_idx_unclipped), x_idx_unclipped) x_idx = tf.clip_by_value(x_idx_unclipped, tf.zeros((), dtype=dtype), ny - 1) # Get the index above and below x_idx. # Naively we could set idx_below = floor(x_idx), idx_above = ceil(x_idx), # however, this results in idx_below == idx_above whenever x is on a grid. # This in turn results in y_ref_below == y_ref_above, and then the gradient # at this point is zero. So here we "jitter" one of idx_below, idx_above, # so that they are at different values. This jittering does not affect the # interpolated value, but does make the gradient nonzero (unless of course # the y_ref values are the same). idx_below = tf.floor(x_idx) idx_above = tf.minimum(idx_below + 1, ny - 1) idx_below = tf.maximum(idx_above - 1, 0) # These are the values of y_ref corresponding to above/below indices. idx_below_int32 = tf.to_int32(idx_below) idx_above_int32 = tf.to_int32(idx_above) y_ref_below = tf.gather(y_ref, idx_below_int32, axis=axis) y_ref_above = tf.gather(y_ref, idx_above_int32, axis=axis) # out_shape = y_ref.shape[:axis] + x.shape + y_ref.shape[axis + 1:] out_shape = tf.shape(y_ref_below) # Return a convex combination. t = x_idx - idx_below t = _expand_ends(t, out_shape, axis) y = t * y_ref_above + (1 - t) * y_ref_below # Now begins a long excursion to fill values outside [x_min, x_max]. # Re-insert NaN wherever x was NaN. y = tf.where( _expand_ends(nan_idx, out_shape, axis, broadcast=True), tf.fill(tf.shape(y), tf.constant(np.nan, y.dtype)), y) x_idx_unclipped = _expand_ends( x_idx_unclipped, out_shape, axis, broadcast=True) if not need_separate_fills: if fill_value == 'constant_extension': pass # Already handled by clipping x_idx_unclipped. else: y = tf.where((x_idx_unclipped < 0) | (x_idx_unclipped > ny - 1), fill_value + tf.zeros_like(y), y) else: # Fill values below x_ref_min <==> x_idx_unclipped < 0. if fill_value_below == 'constant_extension': pass # Already handled by the clipping that created x_idx_unclipped. elif fill_value_below == 'extrapolate': y_0 = tf.gather(y_ref, tf.zeros(tf.shape(x), dtype=tf.int32), axis=axis) y_1 = tf.gather(y_ref, tf.ones(tf.shape(x), dtype=tf.int32), axis=axis) x_delta = (x_ref_max - x_ref_min) / (ny - 1) x_factor = (x - x_ref_min) / x_delta x_factor = _expand_ends(x_factor, out_shape, axis, broadcast=True) y = tf.where(x_idx_unclipped < 0, y_0 + x_factor * (y_1 - y_0), y) else: y = tf.where(x_idx_unclipped < 0, fill_value_below + tf.zeros_like(y), y) # Fill values above x_ref_min <==> x_idx_unclipped > ny - 1. if fill_value_above == 'constant_extension': pass # Already handled by the clipping that created x_idx_unclipped. elif fill_value_above == 'extrapolate': ny_int32 = tf.shape(y_ref)[axis] y_n1 = tf.gather(y_ref, tf.fill(tf.shape(x), ny_int32 - 1), axis=axis) y_n2 = tf.gather(y_ref, tf.fill(tf.shape(x), ny_int32 - 2), axis=axis) x_delta = (x_ref_max - x_ref_min) / (ny - 1) x_factor = (x - x_ref_max) / x_delta x_factor = _expand_ends(x_factor, out_shape, axis, broadcast=True) y = tf.where(x_idx_unclipped > ny - 1, y_n1 + x_factor * (y_n1 - y_n2), y) else: y = tf.where(x_idx_unclipped > ny - 1, fill_value_above + tf.zeros_like(y), y) return y
def lrp_lstm_c(r_out, o, c, i, z, w_o, w_i, act_h, n_timesteps, multiplication_rule): """lrp applied to TeLL LSTMLayer Parameters ------- r_out : tensor (batchsize, timesteps, units) o, c, i, z : tensor (batchsize, timesteps, units) w_o, w_i : tensor (incoming, outgoing) act_h activation function after cell multiplication_rule : int 0...50/50 rule; 1...proportional rule; 3...no multiplication rule, no relevance through recurrent gate connections """ if multiplication_rule == 0: mul_rule = additive_multiplication_rule elif multiplication_rule == 1: mul_rule = proportional_multiplication_rule elif multiplication_rule == 3: mul_rule = None else: raise AttributeError("Only multiplication_rule 0 and 1 are implemented") # Initialize input gate and output gate relevance with 0 r_from_o = [tf.zeros_like(r_out[:, 0, :], tf.float32)] # r_o redistributed to the individual units in t-1 r_from_i = [tf.zeros_like(r_out[:, 0, :], tf.float32)] # r_i redistributed to the individual units in t-1 r_cc = [tf.zeros_like(r_out[:, 0, :], tf.float32)] # r_ct<-ct+1 r_y = [] r_cy = [] # r_ct<-yt r_o = [] r_c = [] r_zi = [] r_z = [] r_i = [] ttt = [] rev_timesteps = np.arange(n_timesteps)[::-1] for t in rev_timesteps: # # for time t # ttt.append(r_out[:, t, :]) if mul_rule is None: r_y.append(r_out[:, t, :]) r_cy.append(r_y[-1]) r_o.append(tf.zeros_like(r_y[-1])) r_c.append(r_cy[-1] + r_cc[-1]) r_zi.append(r_c[-1] * (i[:, t, :] * z[:, t, :] / c[:, t, :])) r_zi[-1] = tf.where(tf.is_nan(r_zi[-1]), tf.zeros_like(r_zi[-1]), r_zi[ -1]) # TODO: This only holds for all-positive case! Otherwise we will need to consider r_zi[-2] to assign either full R or 0 r_z.append(r_zi[-1]) r_i.append(tf.zeros_like(r_zi[-1])) else: r_y.append(r_out[:, t, :] + r_from_o[-1] + r_from_i[-1]) r_cy.append(mul_rule(act_h(c[:, t, :]), o[:, t, :], r_y[-1], c_min, c_max, o_min, o_max)) r_o.append(mul_rule(o[:, t, :], act_h(c[:, t, :]), r_y[-1], o_min, o_max, c_min, c_max)) r_c.append(r_cy[-1] + r_cc[-1]) r_zi.append(r_c[-1] * (i[:, t, :] * z[:, t, :] / c[:, t, :])) r_zi[-1] = tf.where(tf.is_nan(r_zi[-1]), tf.zeros_like(r_zi[-1]), r_zi[ -1]) # TODO: This only holds for all-positive case! Otherwise we will need to consider r_zi[-2] to assign either full R or 0 r_z.append(mul_rule(z[:, t, :], i[:, t, :], r_zi[-1], z_min, z_max, i_min, i_max)) r_i.append(mul_rule(i[:, t, :], z[:, t, :], r_zi[-1], i_min, i_max, z_min, z_max)) if t > 0: # # distribute R to units through recurrent connections # r_from_o_t = lrp(r=r_o[-1], w=w_o, x=o[:, t - 1, :], x_min=o_min, alpha=alpha, beta=beta) r_from_o.append(r_from_o_t) r_from_i_t = lrp(r=r_i[-1], w=w_i, x=i[:, t - 1, :], x_min=i_min, alpha=alpha, beta=beta) r_from_i.append(r_from_i_t) # # for time t-1 # r_cc.append(c[:, t - 1, :] / c[:, t, :] * r_c[-1]) r_cc[-1] = tf.where(tf.is_nan(r_cc[-1]), tf.zeros_like(r_cc[-1]), r_cc[-1]) # TODO: This only holds for all-positive case! r_collection = dict(r_from_o=tf.stack(r_from_o, axis=1), r_from_i=tf.stack(r_from_i, axis=1), r_cc=tf.stack(r_cc, axis=1), r_y=tf.stack(r_y, axis=1), r_cy=tf.stack(r_cy, axis=1), r_o=tf.stack(r_o, axis=1), r_c=tf.stack(r_c, axis=1), r_zi=tf.stack(r_zi, axis=1), r_z=tf.stack(r_z, axis=1), r_i=tf.stack(r_i, axis=1), ttt=tf.stack(ttt, axis=1)) # Relevance is stored with reversed time dimension - correct it r_collection = dict((k, v[:, ::-1, :]) for k, v in r_collection.items()) return r_collection['r_z'], r_collection
def get_features_labels(self, sample_list, offset): '''get features and labels from the samples''' all_images = [] all_labels = [] for sample in sample_list: file_name = sample[0] for example in tf.python_io.tf_record_iterator(file_name): img_geom = self.iad_dimensions[str(self.layer)] features = dict() features['label'] = tf.FixedLenFeature((), tf.int64) features['img/{:02d}'.format(self.layer)] = tf.FixedLenFeature( (), tf.string) features['num_rows/{:02d}'.format( self.layer)] = tf.FixedLenFeature((), tf.int64) features['num_columns/{:02d}'.format( self.layer)] = tf.FixedLenFeature((), tf.int64) parsed_features = tf.parse_single_example(example, features) num_rows = parsed_features['num_rows/{:02d}'.format( self.layer)] num_columns = parsed_features['num_columns/{:02d}'.format( self.layer)] # decode the image, get label img = tf.decode_raw( parsed_features['img/{:02d}'.format(self.layer)], tf.float32) img = tf.where(tf.is_nan(img), tf.zeros_like(img), img) img = tf.clip_by_value(img, 0.0, 1.0) #img = tf.subtract(img, 0.5) img = tf.reshape(img, (num_rows, num_columns, 1), "parse_reshape_test") print("img shape = %s" % img.get_shape()) # random slice of the image #img = tf.random_crop(img, [img_geom[0], img_geom[1], 1]) #column_offsets = list(range(num_columns))[::img_geom[1]] column_offsets = tf.range(0, num_columns - img_geom[1], delta=img_geom[1]) # determine the offset for the IAD slice if offset == -1: # select a random IAD slice start_column = tf.cast( tf.random_shuffle(column_offsets)[0], dtype=tf.int32) new_offset = -1 elif offset == -2: start_column = 0 new_offset = -2 else: start_column = offset new_offset = offset + img_geom[1] if new_offset > img_geom[1]: new_offset = 0 # slice the image img = tf.slice(img, [0, start_column, 0], [img_geom[0], img_geom[1], img_geom[2]]) print("slice shape = %s" % img.get_shape()) # get a random slice of the image, use column offsets #column_offsets = list(range(num_columns))[::img_geom[1]] #start_column = random.choice(column_offsets) #img = tf.slice(img, [0, start_column, 0], [img_geom[0], img_geom[1], img_geom[2]]) #if NORMALIZE_IMAGE: # img = tf.image.per_image_standardization(img) label = tf.cast(parsed_features['label'], tf.int64) label = tf.one_hot(label, depth=self.num_classes, dtype=tf.int32) all_images.append(img) all_labels.append(label) # convert list to ndarray all_images = np.array(all_images) all_labels = np.array(all_labels) return all_images, all_labels, new_offset
def depthLoss(self, y_true, y_pred): diff = tf.where(tf.is_nan(y_true), tf.zeros_like(y_true), y_true - y_pred) mean = tf.sqrt(tf.reduce_mean(tf.square(diff))) return mean
def replace_nan_values(gt): with tf.variable_scope('replace_nan'): gt = tf.where(tf.is_nan(gt), tf.zeros_like(gt), gt) return gt
def tf_get_layer_distance(self, r_0, r_1, v, d): """ Calculates the travel distance in each layer for each photon. Parameters ---------- r_0 : TF Tensor, shape(?, 3) Photon starting positions (scattering point). r_1 : TF Tensor, shape(?, 3) Photon end positions after (next scattering or hit)> v : TF Tensor, shape(?, 3) or None Normalized direction vectors r_1 - r_0. Redundant but since it is already calculated before it should be passed and not calculated again. d : TF Tensor, shape(?) The distance between r_1 and r_0. Also redundant but already known beforehand. Returns ------- TF Tensor of shape(?, N_layers) where each entry is the traveled distance of the corresponding photon in the corresponding layer. """ # grab z coordinates from start and end vectors, make sure z_0 < z_1 z_0 = tf.where(r_0[:, 2] < r_1[:, 2], r_0[:, 2], r_1[:, 2]) z_1 = tf.where(r_0[:, 2] > r_1[:, 2], r_0[:, 2], r_1[:, 2]) # initialize the distance vector (traveled distance in each layer) d_z = tf.zeros([settings.BATCH_SIZE, self.N_layer], dtype=settings.FLOAT_PRECISION) # expand and tile for where z_0 = tf.tile(tf.expand_dims(z_0, 1), [1, self.N_layer]) z_1 = tf.tile(tf.expand_dims(z_1, 1), [1, self.N_layer]) z_l = tf.tile(tf.expand_dims(self._z_l, 0), [settings.BATCH_SIZE, 1]) z_h = tf.tile(tf.expand_dims(self._z_h, 0), [settings.BATCH_SIZE, 1]) # completely traversed layers d_z += tf.where(tf.logical_and(z_l > z_0, z_h < z_1), self.dz * tf.ones_like(d_z), tf.zeros_like(d_z)) # starting layer d_z += tf.where(tf.logical_and(z_l < z_0, z_h > z_0), z_h - z_0, tf.zeros_like(d_z)) # last layer d_z += tf.where(tf.logical_and(z_l < z_1, z_h > z_1), z_1 - z_l, tf.zeros_like(d_z)) # rescale to real direction, since v is normalized the dot product and # therefore cos of the angle is simply the z component of v d_layer = d_z / tf.expand_dims(tf.abs(v[:, 2]), 1) # OR only in one layer d_layer = tf.where( tf.logical_and(tf.logical_and(z_l < z_0, z_h > z_0), tf.logical_and(z_l < z_1, z_h > z_1)), tf.tile(tf.expand_dims(d, 1), [1, self.N_layer]), d_layer) # quick & dirty nan protection... d_layer = tf.where(tf.is_nan(d_layer), tf.zeros_like(d_layer), d_layer) return d_layer
def train(sess, net, is_training, keep_prob, train_layers=None, fine_tune=None): if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) coord = tf.train.Coordinator() reader = load_images(coord, FLAGS.data_dir) corpus_size = reader.corpus_size #import IPython; IPython.embed() if FLAGS.in_memory: X, Y_true = load_data_to_memory(FLAGS.data_dir,pattern='*.npy', limit=1000000, dshape=SP2_BOX) corpus_size = Y_true.size if fine_tune is not None: train_batch_pipe, label_pipe = reader.dequeue(FLAGS.batch_size/2) tX, tY_true = load_data_to_memory(FLAGS.tune_dir,pattern='*.npy', limit=100000, dshape=SP2_BOX) tune_size = tX.shape[0] else: train_batch_pipe, label_pipe = reader.dequeue(FLAGS.batch_size) train_batch = tf.placeholder(reader.tfdtype, name='train_placeholder', shape=[None, SP2_BOX[0], SP2_BOX[1], SP2_BOX[2]]) labels = tf.placeholder(dtype=reader.label_type, shape=[None], name='label_placeholder') if False: train_batch = tf.clip_by_value(train_batch, -1, 1) if False: #single image normalization mean, var = tf.nn.moments(train_batch**2, [1], keep_dims=True) train_batch /= tf.sqrt(mean) if False: mean, var = tf.nn.moments(input_placeholder, [1], keep_dims=True) #single image normalization train_batch = tf.div(tf.subtract(input_placeholder, mean), tf.sqrt(var)) train_batch = tf.where(tf.is_nan(train_batch), tf.zeros_like(train_batch), train_batch) train_batch = tf.nn.avg_pool(train_batch, ksize=[1, FLAGS.tpool_chan, FLAGS.pool_chan, 1], strides=[1, FLAGS.tpool_chan, FLAGS.pool_chan, 1], padding='SAME') if FLAGS.crop: train_batch = tf.image.crop_and_resize(train_batch, boxes=[SP2_BOX]) if train_batch.dtype != tf.float32: train_batch = tf.cast(train_batch, tf.float32) if FLAGS.relu_input == 'relu': train_batch = tf.nn.relu(train_batch) elif FLAGS.relu_input == 'lrelu': train_batch = lrelu(train_batch, alpha=0.2) global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) val_step = tf.get_variable('val_step', [], initializer=tf.constant_initializer(0), trainable=False) logits = net.inference(train_batch, name='logits') #import IPython; IPython.embed() loss_ = net.loss(logits, labels, name='weather_loss') predictions = tf.nn.softmax(logits, name='output') #import IPython; IPython.embed() top1_error = top_k_error(predictions, labels, 1) ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) # loss_avg tf.add_to_collection(UPDATE_OPS_COLLECTION, ema.apply([loss_])) tf.summary.scalar('loss_avg', ema.average(loss_)) # validation stats ema = tf.train.ExponentialMovingAverage(0.99, val_step) val_op = tf.group(val_step.assign_add(1), ema.apply([top1_error])) top1_error_avg = ema.average(top1_error) tf.summary.scalar('val_top1_error_avg', top1_error_avg) learning_rate = tf.placeholder(tf.float32, [], name='learning_rate') tf.summary.scalar('learning_rate', learning_rate) ### opt = tf.train.MomentumOptimizer(learning_rate, MOMENTUM, use_nesterov=True) all_grads = opt.compute_gradients(loss_) if not FLAGS.resume or train_layers is None: grads = all_grads else: grads = [] layer_names = ['fc'] if len(train_layers) > 0: layer_names += ["scale{}".format(i) for i in train_layers] for grad, var in all_grads: if any([n in var.name for n in layer_names]): grads.append([grad, var]) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) for grad, var in grads: if "weight" in var.name and grad is not None and not FLAGS.minimal_summaries: dims = len(grad.get_shape()) grad_per_feat = tf.reduce_mean(grad, reduction_indices=range(dims), name="avg_pool") tf.summary.histogram(var.op.name + '/gradients/', grad) tf.summary.histogram(var.op.name + '/gradients_per_feat/', grad_per_feat) if not FLAGS.minimal_summaries and False: # Display the training images in the visualizer. #tf.image_summary('images', images) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) batchnorm_updates = tf.get_collection(UPDATE_OPS_COLLECTION) batchnorm_updates_op = tf.group(*batchnorm_updates) train_op = tf.group(apply_gradient_op, batchnorm_updates_op) saver = tf.train.Saver(tf.global_variables()) summary_op = tf.summary.merge_all() init = tf.global_variables_initializer() nparams = 0 for v in tf.global_variables(): #sh = np.asarray(v.get_shape()).astype(np.float) if len(v.get_shape())>0: #print(v.name, int(np.prod(v.get_shape()))) nparams += int(np.prod(v.get_shape())) print("Number of parameters in network", nparams) #import IPython; IPython.embed() sess.run(init) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) if FLAGS.resume: latest = tf.train.latest_checkpoint(FLAGS.train_dir) if not latest: print("No checkpoint to continue from in", FLAGS.train_dir) sys.exit(1) print("resume", latest) saver.restore(sess, latest) if not FLAGS.in_memory: threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) try: for epoch in xrange(FLAGS.epoch): if FLAGS.in_memory: inds = np.arange(corpus_size) np.random.shuffle(inds) X, Y_true = X[inds], Y_true[inds] if epoch == 60: FLAGS.learning_rate /= 10. if FLAGS.num_per_epoch: batch_idx = min(FLAGS.num_per_epoch, corpus_size) // FLAGS.batch_size else: batch_idx = corpus_size // FLAGS.batch_size for idx in xrange(batch_idx): start_time = time.time() step = sess.run(global_step) i = [train_op, loss_] write_summary = step % 100 and step > 1 if write_summary: i.append(summary_op) if FLAGS.in_memory: inds = np.random.choice(np.arange(corpus_size), size=FLAGS.batch_size) batch, batch_labels = X[inds], Y_true[inds] else: batch, batch_labels = sess.run([train_batch_pipe, label_pipe]) if fine_tune is not None: inds = np.random.choice(np.arange(tune_size), size=FLAGS.batch_size/2) tbatch, tlabels = tX[inds], tY_true[inds] batch = np.vstack([batch, tbatch]) batch_labels = np.concatenate([batch_labels, tlabels]) #import IPython; IPython.embed() o = sess.run(i, { train_batch:batch, labels:batch_labels, is_training: True, keep_prob: 0.5, learning_rate: FLAGS.learning_rate }) #import IPython; IPython.embed() loss_value = o[1] duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: examples_per_sec = FLAGS.batch_size / float(duration) format_str = ('Epoch %d, [%d / %d], loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (epoch, idx, batch_idx, loss_value, examples_per_sec, duration)) if write_summary: summary_str = o[2] summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step > 1 and step % 500 == 0: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=global_step) # Run validation periodically if step % 100 == 0: _, top1_error_value, y_true, y_pred = sess.run([val_op, top1_error, labels, predictions], {train_batch:batch, labels:batch_labels, is_training: False, keep_prob: 1}) #pp, ll = sess.run([predictions, labels], {is_training:False}) #print('Predictions: ', pp) #print('labels: ', ll) y_pred = np.argmax(y_pred, axis=1) print(sklearn.metrics.classification_report(y_true,y_pred)) print(sklearn.metrics.confusion_matrix(y_true,y_pred)) print("Classification accuracy: %0.6f" % sklearn.metrics.accuracy_score(y_true,y_pred) ) print('weather top1 error {}'.format(top1_error_value)) except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() #G finally: print('Finished, output see {}'.format(FLAGS.train_dir)) if not FLAGS.in_memory: coord.request_stop() coord.join(threads)
def __init__(self, batch_size=32, length_data=3000, n_channel=3, is_training=True, model_name="wavenet"): n_dim = 128 self.graph = tf.Graph() self.model_name = model_name with self.graph.as_default(): self.is_training = is_training self.input_data = tf.placeholder( dtype=tf.float32, shape=[batch_size, length_data, n_channel]) self.label_p = tf.placeholder(dtype=tf.int32, shape=[batch_size, length_data]) self.label_s = tf.placeholder(dtype=tf.int32, shape=[batch_size, length_data]) self.label_pt = tf.placeholder(dtype=tf.float32, shape=[batch_size, length_data, 1]) self.label_st = tf.placeholder(dtype=tf.float32, shape=[batch_size, length_data, 1]) self.weight_p = tf.placeholder(dtype=tf.float32, shape=[batch_size, length_data]) self.weight_s = tf.placeholder(dtype=tf.float32, shape=[batch_size, length_data]) self.weight_pt = tf.placeholder(dtype=tf.float32, shape=[batch_size, length_data]) self.weight_st = tf.placeholder(dtype=tf.float32, shape=[batch_size, length_data]) if model_name == "wavenet": net = model.wavenet(self.input_data, is_training=is_training) elif model_name == "unet": net = model.unet(self.input_data, is_training=is_training) elif model_name == "brnn": net = model.brnn(self.input_data, is_training=is_training) elif model_name == "inception": net = model.inception(self.input_data, is_training=is_training) else: raise "Model name error" with tf.variable_scope('logit_p'): self.logit_p = tf.layers.conv1d(net, 2, 3, activation=None, padding="same") with tf.variable_scope('logit_s'): self.logit_s = tf.layers.conv1d(net, 2, 3, activation=None, padding="same") with tf.variable_scope('time_p'): self.times_p = tf.layers.conv1d(net, 1, 3, activation=None, padding="same") with tf.variable_scope('time_s'): self.times_s = tf.layers.conv1d(net, 1, 3, activation=None, padding="same") loss_p = tf.contrib.seq2seq.sequence_loss(self.logit_p, self.label_p, self.weight_p) loss_s = tf.contrib.seq2seq.sequence_loss(self.logit_s, self.label_s, self.weight_s) loss_tp = tf.reduce_mean( tf.reduce_sum(tf.squeeze( (self.label_pt - self.times_p)**2) * self.weight_pt, axis=1)) loss_ts = tf.reduce_mean( tf.reduce_sum(tf.squeeze( (self.label_st - self.times_s)**2) * self.weight_st, axis=1)) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) reg_loss = tf.losses.get_regularization_loss() with tf.control_dependencies(update_ops): self.loss = loss_p * 1 + loss_s * 1 + loss_tp * 1 + loss_ts * 1 + 1e-6 * reg_loss # optimizer optimizer = tf.train.AdamOptimizer() self.optimize = optimizer.minimize(self.loss) self.logit_loss = loss_p + loss_s self.times_loss = loss_tp + loss_ts self.nan = tf.is_nan(self.loss) self.inf = tf.is_inf(self.loss) self.all_var = tf.trainable_variables() self.init = tf.global_variables_initializer() self.saver = tf.train.Saver() for itr in self.all_var: print(itr.name, itr.get_shape()) self.summary = tf.summary.FileWriter("logdir", graph=self.graph)
def nan_mask(gt): with tf.variable_scope('remove_nan'): nan_mask = tf.where(tf.is_nan(gt), tf.zeros_like(gt), tf.ones_like(gt)) return nan_mask
def replace_none(self, t): """ This method replaces None with 0. This can be used for sampling. If sampling None, the viewer turns black and does not recover. """ return tf.where(tf.is_nan(t),tf.zeros_like(t),t)
def predict(sess, net, is_training, keep_prob, prefix='test_', append=False, from_fil=True): if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) coord = tf.train.Coordinator() if from_fil: reader = load_filterbank(coord, FLAGS.fbfilename) test_batch, img_id = reader.dequeue(FLAGS.batch_size) if True: mean, var = tf.nn.moments(test_batch, [1], keep_dims=True) #single image normalization test_batch = tf.div(tf.subtract(test_batch, mean), tf.sqrt(var)) test_batch = tf.where(tf.is_nan(test_batch), tf.zeros_like(test_batch), test_batch) test_batch = tf.nn.avg_pool(test_batch, ksize=[1, FLAGS.tpool_chan, FLAGS.pool_chan, 1], strides=[1, FLAGS.tpool_chan, FLAGS.pool_chan, 1], padding='SAME') if FLAGS.crop: test_batch = tf.image.crop_and_resize(test_batch, boxes=[SP2_BOX]) else: reader = load_images(coord, FLAGS.data_dir, train=False) test_batch, img_id, _ = reader.dequeue(FLAGS.batch_size) if False: mean, var = tf.nn.moments(test_batch**2, [1], keep_dims=True) test_batch /= tf.sqrt(mean) corpus_size = reader.corpus_size global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) logits = net.inference(test_batch) wpred = tf.nn.softmax(logits) init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver(tf.global_variables()) latest = tf.train.latest_checkpoint(FLAGS.train_dir) if not latest: print("No checkpoint to continue from in", FLAGS.train_dir) sys.exit(1) print("resume", latest) saver.restore(sess, latest) threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) #import IPython; IPython.embed() sample_cnt = 0; add_cnt = FLAGS.batch_size OUTDIR = FLAGS.train_dir+'pos_5/' OUTFILE = FLAGS.train_dir+'pos_5.csv' if not append: outfile = open(OUTFILE, 'w') #outfile.write("image_name,tags\n") else: outfile = open(OUTFILE, 'a') detections = [] persistent = False #OUTDIR = FLAGS.train_dir+'/frb20180301_3/' if not os.path.exists(OUTDIR): os.makedirs(OUTDIR) try: while True: print('from train', sample_cnt, corpus_size) if sample_cnt + FLAGS.batch_size*4 > corpus_size-1: break start_time = time.time() weather_scores, image_id, inputs = sess.run([wpred, img_id, test_batch], { is_training: False, keep_prob: 1 }) string_list = get_predictions(weather_scores, batched=True) duration = time.time() - start_time #import IPython; IPython.embed() for n, label_str in enumerate(string_list): #print(prefix+str(image_id[n])+','+label_str) if n + sample_cnt >= corpus_size: add_cnt = n break t_ind = int(image_id[n].split('_')[-1]) # if weather_scores[n,1]> 0.5: fname = prefix+str(image_id[n])#+','+label_str+'\n' if not persistent: detections.append([t_ind, weather_scores[n][1]]) _save_pos(inputs[n], OUTDIR+fname, tstart=t_ind*0.0003495253) #np.save(OUTDIR+fname, inputs[n]) print(t_ind*0.0003495253333333333, weather_scores[n][1]) outfile.write(','.join([str(t_ind*0.0003495253), str(weather_scores[n][1]), fname, '\n'])) #import IPython; IPython.embed() persistent = True else: persistent = False #outfile.write(prefix+str(image_id[n])+','+label_str+'\n') sample_cnt += add_cnt if sample_cnt % 20 == 0: perc = (FLAGS.batch_size/float(corpus_size))/(duration/(30.*60)) qsize = sess.run(reader.queue.size()) print("{}/{}, {} sec/batch, {} real time, queue size: {}".format(sample_cnt, corpus_size, duration, perc, qsize)) except(ValueError): # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: print('Finished, output see {}'.format(fname)) coord.request_stop() coord.join(threads) print('saving', OUTDIR+prefix) np.save(OUTDIR+prefix, np.asarray(detections)) outfile.close()
x_tf = tf.constant(x) y_tf = tf.constant(y) t_tf = x_tf / y_tf m_tf = tf.reduce_mean(t_tf) v_tf = tf.reduce_mean((t_tf - m_tf)**2) final = (t_tf - m_tf) / tf.sqrt(v_tf) # In[36]: print(sess.run(final)) # So now we want to go nan/inf hunting again in TensorFlow. # In[41]: print(sess.run(tf.reduce_any(tf.logical_or(tf.is_inf(t_tf), tf.is_nan(t_tf))))) print(sess.run(tf.logical_or(tf.is_inf(t_tf), tf.is_nan(t_tf)))) # Or using the shorthand for tf.logical_or # print(sess.run(tf.is_inf(t_tf) | tf.is_nan(t_tf))) # I can still print known elements of Tensors, but conditionals will be challenging mid-way through the computation graph. # In[42]: print(sess.run(t_tf[1])) # What we did in NumPy is not strictly possible in TensorFlow (this will throw a lot of errors). However, we can still use things like `tf.cond` and `tf.where` along with any of the `tf.reduce_*` operations. # In[57]: # sess.run(t_tf[tf.where(tf.is_inf(t_tf) | tf.is_nan(t_tf))])
def call(self, inputs): outputs = tf.where(tf.is_nan(inputs), K.zeros_like(inputs), inputs) return outputs