def m_body(i, ta_tp, ta_fp, gmatch): # Jaccard score with groundtruth bboxes. rbbox = bboxes[i] jaccard = bboxes_jaccard(rbbox, gbboxes) jaccard = jaccard * tf.cast(tf.equal(glabels, rlabel), dtype=jaccard.dtype) # Best fit, checking it's above threshold. idxmax = tf.cast(tf.argmax(jaccard, axis=0), tf.int32) jcdmax = jaccard[idxmax] match = jcdmax > matching_threshold existing_match = gmatch[idxmax] not_difficult = tf.logical_not(gdifficults[idxmax]) # TP: match & no previous match and FP: previous match | no match. # If difficult: no record, i.e FP=False and TP=False. tp = tf.logical_and(not_difficult, tf.logical_and(match, tf.logical_not(existing_match))) ta_tp = ta_tp.write(i, tp) fp = tf.logical_and(not_difficult, tf.logical_or(existing_match, tf.logical_not(match))) ta_fp = ta_fp.write(i, fp) # Update grountruth match. mask = tf.logical_and(tf.equal(grange, idxmax), tf.logical_and(not_difficult, match)) gmatch = tf.logical_or(gmatch, mask) return [i+1, ta_tp, ta_fp, gmatch]
def tf_cheating_contcartpole(state, action): gravity = 9.8 masscart = 1.0 masspole = 0.1 total_mass = (masspole + masscart) length = 0.5 # actually half the pole's length polemass_length = (masspole * length) force_mag = 10.0 tau = 0.02 # seconds between state updates # Angle at which to fail the episode theta_threshold_radians = 12 * 2 * math.pi / 360 x_threshold = 2.4 x, x_dot, theta, theta_dot = tf.split(state, 4, axis=-1) done = tf.logical_or(x < -x_threshold, tf.logical_or(x > x_threshold, tf.logical_or(theta < -theta_threshold_radians, theta > theta_threshold_radians))) force = force_mag * action costheta = tf.cos(theta) sintheta = tf.sin(theta) temp = old_div((force + polemass_length * theta_dot * theta_dot * sintheta), total_mass) thetaacc = old_div((gravity * sintheta - costheta* temp), (length * (old_div(4.0,3.0) - masspole * costheta * costheta / total_mass))) xacc = temp - polemass_length * thetaacc * costheta / total_mass x = x + tau * x_dot x_dot = x_dot + tau * xacc theta = theta + tau * theta_dot theta_dot = theta_dot + tau * thetaacc state = tf.concat([x,x_dot,theta,theta_dot], -1) done = tf.squeeze(tf.cast(done, tf.float32), -1) reward = 1.0 - done done *= 0. return state, reward, done
def m_body(i, ta_tp, ta_fp, gmatch, n_ignored_det): # Jaccard score with groundtruth bboxes. rbbox = bboxes[i, :] # rbbox = tf.Print(rbbox, [rbbox]) jaccard = bboxes_jaccard(rbbox, gxs, gys) # Best fit, checking it's above threshold. idxmax = tf.cast(tf.argmax(jaccard, axis=0), dtype = tf.int32) jcdmax = jaccard[idxmax] match = jcdmax > matching_threshold existing_match = gmatch[idxmax] not_ignored = tf.logical_not(gignored[idxmax]) n_ignored_det = n_ignored_det + tf.cast(gignored[idxmax], tf.int32) # TP: match & no previous match and FP: previous match | no match. # If ignored: no record, i.e FP=False and TP=False. tp = tf.logical_and(not_ignored, tf.logical_and(match, tf.logical_not(existing_match))) ta_tp = ta_tp.write(i, tp) fp = tf.logical_and(not_ignored, tf.logical_or(existing_match, tf.logical_not(match))) ta_fp = ta_fp.write(i, fp) # Update grountruth match. mask = tf.logical_and(tf.equal(grange, idxmax), tf.logical_and(not_ignored, match)) gmatch = tf.logical_or(gmatch, mask) return [i+1, ta_tp, ta_fp, gmatch,n_ignored_det]
def accuracy(log, w1, w2, w3): with tf.name_scope('accuracy') as scope: c1 = tf.equal(tf.argmax(log, 1), tf.argmax(w1, 1)) c2 = tf.equal(tf.argmax(log, 1), tf.argmax(w2, 1)) c3 = tf.equal(tf.argmax(log, 1), tf.argmax(w3, 1)) correct_prediction = tf.logical_or(tf.logical_or(c1,c2),c3) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) tf.scalar_summary("accuracy", accuracy) return accuracy
def set_zero_on_high_global_norm(self, grad, grad_norm_threshold, global_norm_tag=None): """ :param tf.Tensor grad: :param float grad_norm_threshold: :param str|None global_norm_tag: :rtype: tf.Tensor """ norm = self.get_global_grad_norm(tag=global_norm_tag) # Also check nan/inf. Treat them as if we would have been over grad_norm_threshold. zero_cond = tf.logical_or(tf.is_nan(norm), tf.is_inf(norm)) zero_cond = tf.logical_or(zero_cond, tf.greater(norm, grad_norm_threshold)) return tf.where(zero_cond, tf.zeros_like(grad), grad)
def _inverse_log_det_jacobian(self, y, use_saved_statistics=False): if not self.batchnorm.built: # Create variables. self.batchnorm.build(y.shape) event_dims = self.batchnorm.axis reduction_axes = [i for i in range(len(y.shape)) if i not in event_dims] # At training-time, ildj is computed from the mean and log-variance across # the current minibatch. # We use multiplication instead of tf.where() to get easier broadcasting. use_saved_statistics = tf.cast( tf.logical_or(use_saved_statistics, tf.logical_not(self._training)), tf.float32) log_variance = tf.log( (1 - use_saved_statistics) * tf.nn.moments(y, axes=reduction_axes, keep_dims=True)[1] + use_saved_statistics * self.batchnorm.moving_variance + self.batchnorm.epsilon) # `gamma` and `log Var(y)` reductions over event_dims. # Log(total change in area from gamma term). log_total_gamma = tf.reduce_sum(tf.log(self.batchnorm.gamma)) # Log(total change in area from log-variance term). log_total_variance = tf.reduce_sum(log_variance) # The ildj is scalar, as it does not depend on the values of x and are # constant across minibatch elements. return log_total_gamma - 0.5 * log_total_variance
def _get_values_from_start_and_end(self, input_tensor, num_start_samples, num_end_samples, total_num_samples): """slices num_start_samples and last num_end_samples from input_tensor. Args: input_tensor: An int32 tensor of shape [N] to be sliced. num_start_samples: Number of examples to be sliced from the beginning of the input tensor. num_end_samples: Number of examples to be sliced from the end of the input tensor. total_num_samples: Sum of is num_start_samples and num_end_samples. This should be a scalar. Returns: A tensor containing the first num_start_samples and last num_end_samples from input_tensor. """ input_length = tf.shape(input_tensor)[0] start_positions = tf.less(tf.range(input_length), num_start_samples) end_positions = tf.greater_equal( tf.range(input_length), input_length - num_end_samples) selected_positions = tf.logical_or(start_positions, end_positions) selected_positions = tf.cast(selected_positions, tf.int32) indexed_positions = tf.multiply(tf.cumsum(selected_positions), selected_positions) one_hot_selector = tf.one_hot(indexed_positions - 1, total_num_samples, dtype=tf.int32) return tf.tensordot(input_tensor, one_hot_selector, axes=[0, 0])
def body_infer(time, inputs, caches, outputs_tas, finished, log_probs, lengths, bs_stat_ta, predicted_ids): """Internal while_loop body. Args: time: Scalar int32 Tensor. inputs: A list of inputs Tensors. caches: A dict of decoder states. outputs_tas: A list of TensorArrays. finished: A bool tensor (keeping track of what's finished). log_probs: The log probability Tensor. lengths: The decoding length Tensor. bs_stat_ta: structure of TensorArray. predicted_ids: A Tensor. Returns: `(time + 1, next_inputs, next_caches, next_outputs_tas, next_finished, next_log_probs, next_lengths, next_infer_status_ta)`. """ # step decoder def _decoding(_decoder, _input, _cache, _decoder_output_remover, _outputs_ta, _outputs_to_logits_fn): with tf.variable_scope(_decoder.name): _output, _next_cache = _decoder.step(_input, _cache) _decoder_top_features = _decoder.merge_top_features(_output) _ta = nest.map_structure(lambda _ta_ms, _output_ms: _ta_ms.write(time, _output_ms), _outputs_ta, _decoder_output_remover.apply(_output)) _logit = _outputs_to_logits_fn(_decoder_top_features) return _output, _next_cache, _ta, _logit outputs, next_caches, next_outputs_tas, logits = repeat_n_times( num_models, _decoding, decoders, inputs, caches, decoder_output_removers, outputs_tas, outputs_to_logits_fns) # sample next symbols sample_ids, beam_ids, next_log_probs, next_lengths \ = helper.sample_symbols(logits, log_probs, finished, lengths, time=time) for c in next_caches: c["decoding_states"] = gather_states(c["decoding_states"], beam_ids) infer_status = BeamSearchStateSpec( log_probs=next_log_probs, beam_ids=beam_ids) bs_stat_ta = nest.map_structure(lambda ta, out: ta.write(time, out), bs_stat_ta, infer_status) predicted_ids = gather_states(tf.reshape(predicted_ids, [-1, time + 1]), beam_ids) next_predicted_ids = tf.concat([predicted_ids, tf.expand_dims(sample_ids, axis=1)], axis=1) next_predicted_ids = tf.reshape(next_predicted_ids, [-1]) next_predicted_ids.set_shape([None]) next_finished, next_input_symbols = helper.next_symbols(time=time, sample_ids=sample_ids) next_inputs = repeat_n_times(num_models, target_to_embedding_fns, next_input_symbols, time + 1) next_finished = tf.logical_or(next_finished, finished) return time + 1, next_inputs, next_caches, next_outputs_tas, \ next_finished, next_log_probs, next_lengths, bs_stat_ta, \ next_predicted_ids
def set_logp_to_neg_inf(X, logp, bounds): """Set `logp` to negative infinity when `X` is outside the allowed bounds. # Arguments X: tensorflow.Tensor The variable to apply the bounds to logp: tensorflow.Tensor The log probability corrosponding to `X` bounds: list of `Region` objects The regions corrosponding to allowed regions of `X` # Returns logp: tensorflow.Tensor The newly bounded log probability """ conditions = [] for l, u in bounds: lower_is_neg_inf = not isinstance(l, tf.Tensor) and np.isneginf(l) upper_is_pos_inf = not isinstance(u, tf.Tensor) and np.isposinf(u) if not lower_is_neg_inf and upper_is_pos_inf: conditions.append(tf.greater(X, l)) elif lower_is_neg_inf and not upper_is_pos_inf: conditions.append(tf.less(X, u)) elif not (lower_is_neg_inf or upper_is_pos_inf): conditions.append(tf.logical_and(tf.greater(X, l), tf.less(X, u))) if len(conditions) > 0: is_inside_bounds = conditions[0] for condition in conditions[1:]: is_inside_bounds = tf.logical_or(is_inside_bounds, condition) logp = tf.select(is_inside_bounds, logp, tf.fill(tf.shape(X), config.dtype(-np.inf))) return logp
def not_done_step(a, _): reward, done = self._batch_env.simulate(action) with tf.control_dependencies([reward, done]): r0 = self._batch_env.observ r1 = tf.add(a[1], reward) r2 = tf.logical_or(a[2], done) return (r0, r1, r2)
def update_op(self, has_nan, amax): def overflow_case(): new_scale_val = tf.clip_by_value(self.scale / self.step_factor, self.scale_min, self.scale_max) scale_assign = tf.assign(self.scale, new_scale_val) overflow_iter_assign = tf.assign(self.last_overflow_iteration, self.iteration) with tf.control_dependencies([scale_assign, overflow_iter_assign]): return tf.identity(self.scale) def scale_case(): since_overflow = self.iteration - self.last_overflow_iteration should_update = tf.equal(since_overflow % self.step_window, 0) def scale_update_fn(): new_scale_val = tf.clip_by_value(self.scale * self.step_factor, self.scale_min, self.scale_max) return tf.assign(self.scale, new_scale_val) return tf.cond(should_update, scale_update_fn, lambda: self.scale) iter_update = tf.assign_add(self.iteration, 1) overflow = tf.logical_or(has_nan, tf.is_inf(amax)) update_op = tf.cond(overflow, overflow_case, scale_case) with tf.control_dependencies([update_op]): return tf.identity(iter_update)
def apply_gradients(self, grads_and_vars, global_step=None, name=None): def apply_ops_wrapper(): update_op = self._optimizer.apply_gradients(grads_and_vars, global_step, name) apply_ops = [] with tf.control_dependencies([update_op]): for grad, var in grads_and_vars: if var.name in self._fp32_to_fp16: dst_var = self._fp32_to_fp16[var.name] apply_ops.append( tf.assign(dst_var, tf.saturate_cast(var, tf.float16))) if apply_ops: return tf.group(apply_ops) return update_op if self._loss_scaler: grad_has_nans, grad_amax = AutomaticLossScaler.check_grads(grads_and_vars) should_skip_update = tf.logical_or(tf.is_inf(grad_amax), grad_has_nans) loss_scale_update_op = self._loss_scaler.update_op(grad_has_nans, grad_amax) with tf.control_dependencies([loss_scale_update_op]): return tf.cond(should_skip_update, tf.no_op, apply_ops_wrapper) else: return apply_ops_wrapper()
def update_op(self, has_nan, amax): is_nonfinite = tf.logical_or(has_nan, tf.is_inf(amax)) x = tf.cond(is_nonfinite, lambda: tf.pow(2., self.log_max), lambda: tf.log(amax) / tf.log(tf.constant(2.))) x_hat_assn = tf.assign(self.x_hat, self.beta1 * self.x_hat + (1 - self.beta1) * x) b1_corr_assn = tf.assign(self.b1_correction, self.b1_correction * self.beta1) with tf.control_dependencies([x_hat_assn, b1_corr_assn]): mu = self.x_hat.read_value() / (1 - self.b1_correction.read_value()) slow_x_hat_assn = tf.assign(self.slow_x_hat, self.beta2 * self.slow_x_hat + (1 - self.beta2) * x) xsquared_hat_assn = tf.assign(self.xsquared_hat, self.beta2 * self.xsquared_hat + (1 - self.beta2) * (x * x)) b2_corr_assn = tf.assign(self.b2_correction, self.b2_correction * self.beta2) with tf.control_dependencies([slow_x_hat_assn, xsquared_hat_assn, b2_corr_assn]): e_xsquared = self.xsquared_hat.read_value() / (1 - self.b2_correction.read_value()) slow_mu = self.slow_x_hat.read_value() / (1 - self.b2_correction.read_value()) sigma2 = e_xsquared - (slow_mu * slow_mu) sigma = tf.sqrt(tf.maximum(sigma2, tf.constant(0.))) log_cutoff = sigma * self.overflow_std_dev + mu log_difference = 16 - log_cutoff proposed_scale = tf.pow(2., log_difference) scale_update = tf.assign(self.scale, tf.clip_by_value(proposed_scale, self.scale_min, self.scale_max)) iter_update = tf.assign_add(self.iteration, 1) with tf.control_dependencies([scale_update]): return tf.identity(iter_update)
def add_dyprune(weights): crate = config.crate[weights.name[:-2]] #hyperpara C rate prune_mask = tf.Variable(tf.ones_like(weights),name=weights.name[:-2]+'mask', trainable=False) #calculate mask mean = tf.divide(tf.reduce_sum(tf.multiply(tf.abs(weights),prune_mask)),tf.reduce_sum(prune_mask)) var = tf.multiply(weights,prune_mask) var = tf.square(var) mean_q = tf.square(mean)*tf.reduce_sum(prune_mask) var = tf.reduce_sum(var) - mean_q var = tf.divide(var,tf.reduce_sum(prune_mask)) var = tf.sqrt(var) t1_lower = (mean+var*crate)*0.25 #hyperpara a t1_upper = (mean+var*crate)*0.45 #hyperpara b indicator_lower1 = tf.greater_equal(tf.abs(weights), tf.ones_like(weights) * t1_lower) indicator_upper1 = tf.greater_equal(tf.abs(weights), tf.ones_like(weights) * t1_upper) indicator_matrix1 = tf.greater_equal(prune_mask, tf.zeros_like(weights)) indicator_matrix1 = tf.logical_and(indicator_matrix1,indicator_lower1) indicator_matrix1 = tf.logical_or(indicator_matrix1,indicator_upper1) indicator_matrix1 = tf.to_float(indicator_matrix1) update = prune_mask.assign(indicator_matrix1) prune_fc = tf.multiply(weights, prune_mask) return prune_fc
def termination_condition(self, state): char_idx = tf.cast(tf.argmax(state.phi, axis=1), tf.int32) final_char = char_idx >= self.attention_values_lengths - 1 past_final_char = char_idx >= self.attention_values_lengths output = self.output_function(state) es = tf.cast(output[:, 2], tf.int32) is_eos = tf.equal(es, np.ones_like(es)) return tf.logical_or(tf.logical_and(final_char, is_eos), past_final_char)
def subsample(self, indicator, batch_size, labels, scope=None): """Returns subsampled minibatch. Args: indicator: boolean tensor of shape [N] whose True entries can be sampled. batch_size: desired batch size. If None, keeps all positive samples and randomly selects negative samples so that the positive sample fraction matches self._positive_fraction. It cannot be None is is_static is True. labels: boolean tensor of shape [N] denoting positive(=True) and negative (=False) examples. scope: name scope. Returns: sampled_idx_indicator: boolean tensor of shape [N], True for entries which are sampled. Raises: ValueError: if labels and indicator are not 1D boolean tensors. """ if len(indicator.get_shape().as_list()) != 1: raise ValueError('indicator must be 1 dimensional, got a tensor of ' 'shape %s' % indicator.get_shape()) if len(labels.get_shape().as_list()) != 1: raise ValueError('labels must be 1 dimensional, got a tensor of ' 'shape %s' % labels.get_shape()) if labels.dtype != tf.bool: raise ValueError('labels should be of type bool. Received: %s' % labels.dtype) if indicator.dtype != tf.bool: raise ValueError('indicator should be of type bool. Received: %s' % indicator.dtype) with tf.name_scope(scope, 'BalancedPositiveNegativeSampler'): if self._is_static: return self._static_subsample(indicator, batch_size, labels) else: # Only sample from indicated samples negative_idx = tf.logical_not(labels) positive_idx = tf.logical_and(labels, indicator) negative_idx = tf.logical_and(negative_idx, indicator) # Sample positive and negative samples separately if batch_size is None: max_num_pos = tf.reduce_sum(tf.to_int32(positive_idx)) else: max_num_pos = int(self._positive_fraction * batch_size) sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos) num_sampled_pos = tf.reduce_sum(tf.cast(sampled_pos_idx, tf.int32)) if batch_size is None: negative_positive_ratio = ( 1 - self._positive_fraction) / self._positive_fraction max_num_neg = tf.to_int32( negative_positive_ratio * tf.to_float(num_sampled_pos)) else: max_num_neg = batch_size - num_sampled_pos sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg) return tf.logical_or(sampled_pos_idx, sampled_neg_idx)
def simulate(self, action): with tf.name_scope("environment/simulate"): reward, done = self._batch_env.simulate(action) with tf.control_dependencies([reward, done]): new_done = tf.logical_or(done, self._time_elapsed > self.timelimit) inc = self._time_elapsed.assign_add(tf.ones_like(self._time_elapsed)) with tf.control_dependencies([inc]): return tf.identity(reward), tf.identity(new_done)
def integral(lower, upper): return tf.cond( tf.logical_or( tf.is_inf(tf.cast(lower, config.dtype)), tf.is_inf(tf.cast(upper, config.dtype)) ), lambda: tf.constant(1, dtype=config.dtype), lambda: tf.cast(upper, config.dtype) - tf.cast(lower, config.dtype), )
def _log_prob(self, x): log_prob = -(0.5 * tf.square((x - self.loc) / self.scale) + 0.5 * np.log(2. * np.pi) + tf.log(self.scale * self._normalizer)) # p(x) is 0 outside the bounds. neg_inf = tf.log(tf.zeros_like(log_prob)) bounded_log_prob = tf.where(tf.logical_or(tf.greater(x, self._high), tf.less(x, self._low)), neg_inf, log_prob) return bounded_log_prob
def detectMinVal(input_mat, var, threshold=1e-6, name='', debug=False): eigen_min = tf.reduce_min(input_mat) eigen_max = tf.reduce_max(input_mat) eigen_ratio = eigen_max / eigen_min input_mat_clipped = clipoutNeg(input_mat, threshold) if debug: input_mat_clipped = tf.cond(tf.logical_or(tf.greater(eigen_ratio, 0.), tf.less(eigen_ratio, -500)), lambda: input_mat_clipped, lambda: tf.Print( input_mat_clipped, [tf.convert_to_tensor('screwed ratio ' + name + ' eigen values!!!'), tf.convert_to_tensor(var.name), eigen_min, eigen_max, eigen_ratio])) return input_mat_clipped
def _prob(self, x): broadcasted_x = x * tf.ones( self.batch_shape_tensor(), dtype=x.dtype) return tf.where( tf.is_nan(broadcasted_x), broadcasted_x, tf.where( tf.logical_or(broadcasted_x < self.low, # This > is only sound for continuous uniform broadcasted_x > self.high), tf.zeros_like(broadcasted_x), tf.ones_like(broadcasted_x) / self.range()))
def _log_prob_single(tensor): stddev = tf.sqrt(scale_factor / calculate_variance_factor(tensor.shape, mode)) z = (tensor - mean) / stddev log_prob_z = - (z ** 2 + tf.log(2 * pi)) / 2 log_prob = tf.reduce_sum(log_prob_z) if truncated: from numpy import inf log_prob -= tf.log(TRUNCATED_NORMALIZER) invalid = tf.logical_or(tf.less_equal(z, -2), tf.greater_equal(z, 2)) log_prob = tf.where(invalid, -inf, log_prob) # Return negative as this is a regularizer return - log_prob
def _lower_bound_grad(op, grad): """Gradient for `lower_bound` if `gradient == 'identity_if_towards'`. Args: op: The op for which to calculate a gradient. grad: Gradient with respect to the output of the op. Returns: Gradient with respect to the inputs of the op. """ inputs, bound = op.inputs pass_through_if = tf.logical_or(inputs >= bound, grad < 0) return [tf.cast(pass_through_if, grad.dtype) * grad, None]
def _log_prob_single(tensor): stddev = tf.sqrt(scale_factor / calculate_variance_factor(tensor.shape, mode)) z1 = (tensor - mean - 3 * stddev) / stddev log_prob_z1 = - (z1 ** 2 + tf.log(2 * pi)) / 2 log_prob1 = tf.reduce_sum(log_prob_z1) z2 = (tensor - mean + 3 * stddev) / stddev log_prob_z2 = - (z2 ** 2 + tf.log(2 * pi)) / 2 log_prob2 = tf.reduce_sum(log_prob_z2) if truncated: from numpy import inf log_prob1 -= tf.log(TRUNCATED_NORMALIZER) invalid = tf.logical_or(tf.less_equal(z1, -2), tf.greater_equal(z1, 2)) log_prob1 = tf.where(invalid, -inf, log_prob1) log_prob2 -= tf.log(TRUNCATED_NORMALIZER) invalid = tf.logical_or(tf.less_equal(z2, -2), tf.greater_equal(z2, 2)) log_prob2 = tf.where(invalid, -inf, log_prob2) # Return negative as this is a regularizer m = tf.maximum(log_prob1, log_prob2) - tf.log(2.0) log_prob1 -= m log_prob2 -= m log_prob = m + tf.log(tf.exp(log_prob1) + tf.exp(log_prob2)) return - log_prob
def _subsample_selection_to_desired_neg_pos_ratio(self, indices, match, max_negatives_per_positive, min_negatives_per_image=0): """Subsample a collection of selected indices to a desired neg:pos ratio. This function takes a subset of M indices (indexing into a large anchor collection of N anchors where M<N) which are labeled as positive/negative via a Match object (matched indices are positive, unmatched indices are negative). It returns a subset of the provided indices retaining all positives as well as up to the first K negatives, where: K=floor(num_negative_per_positive * num_positives). For example, if indices=[2, 4, 5, 7, 9, 10] (indexing into 12 anchors), with positives=[2, 5] and negatives=[4, 7, 9, 10] and num_negatives_per_positive=1, then the returned subset of indices is [2, 4, 5, 7]. Args: indices: An integer tensor of shape [M] representing a collection of selected anchor indices match: A matcher.Match object encoding the match between anchors and groundtruth boxes for a given image, with rows of the Match objects corresponding to groundtruth boxes and columns corresponding to anchors. max_negatives_per_positive: (float) maximum number of negatives for each positive anchor. min_negatives_per_image: minimum number of negative anchors for a given image. Allow sampling negatives in image without any positive anchors. Returns: selected_indices: An integer tensor of shape [M'] representing a collection of selected anchor indices with M' <= M. num_positives: An integer tensor representing the number of positive examples in selected set of indices. num_negatives: An integer tensor representing the number of negative examples in selected set of indices. """ positives_indicator = tf.gather(match.matched_column_indicator(), indices) negatives_indicator = tf.gather(match.unmatched_column_indicator(), indices) num_positives = tf.reduce_sum(tf.to_int32(positives_indicator)) max_negatives = tf.maximum(min_negatives_per_image, tf.to_int32(max_negatives_per_positive * tf.to_float(num_positives))) topk_negatives_indicator = tf.less_equal( tf.cumsum(tf.to_int32(negatives_indicator)), max_negatives) subsampled_selection_indices = tf.where( tf.logical_or(positives_indicator, topk_negatives_indicator)) num_negatives = tf.size(subsampled_selection_indices) - num_positives return (tf.reshape(tf.gather(indices, subsampled_selection_indices), [-1]), num_positives, num_negatives)
def maybe_split_sequence_lengths(sequence_length, num_splits, total_length): """Validates and splits `sequence_length`, if necessary. Returned value must be used in graph for all validations to be executed. Args: sequence_length: A batch of sequence lengths, either sized `[batch_size]` and equal to either 0 or `total_length`, or sized `[batch_size, num_splits]`. num_splits: The scalar number of splits of the full sequences. total_length: The scalar total sequence length (potentially padded). Returns: sequence_length: If input shape was `[batch_size, num_splits]`, returns the same Tensor. Otherwise, returns a Tensor of that shape with each input length in the batch divided by `num_splits`. Raises: ValueError: If `sequence_length` is not shaped `[batch_size]` or `[batch_size, num_splits]`. tf.errors.InvalidArgumentError: If `sequence_length` is shaped `[batch_size]` and all values are not either 0 or `total_length`. """ if sequence_length.shape.ndims == 1: if total_length % num_splits != 0: raise ValueError( '`total_length` must be evenly divisible by `num_splits`.') with tf.control_dependencies( [tf.Assert( tf.reduce_all( tf.logical_or(tf.equal(sequence_length, 0), tf.equal(sequence_length, total_length))), data=[sequence_length])]): sequence_length = ( tf.tile(tf.expand_dims(sequence_length, axis=1), [1, num_splits]) // num_splits) elif sequence_length.shape.ndims == 2: with tf.control_dependencies([ tf.assert_less_equal( sequence_length, tf.constant(total_length // num_splits, tf.int32), message='Segment length cannot be more than ' '`total_length / num_splits`.')]): sequence_length = tf.identity(sequence_length) sequence_length.set_shape([sequence_length.shape[0], num_splits]) else: raise ValueError( 'Sequence lengths must be given as a vector or a 2D Tensor whose ' 'second dimension size matches its initial hierarchical split. Got ' 'shape: %s' % sequence_length.shape.as_list()) return sequence_length
def _body(step, finished, inputs, lengths, log_probs, cache): inputs_lengths = tf.add(lengths, 1 - tf.cast(finished, lengths.dtype)) logits, cache = symbols_to_logits_fn(inputs, step, cache) probs = tf.nn.log_softmax(logits) sample_ids = tf.argmax(probs, axis=-1) # Accumulate log probabilities. sample_probs = tf.reduce_max(probs, axis=-1) masked_probs = tf.squeeze(sample_probs, -1) * (1.0 - tf.cast(finished, sample_probs.dtype)) log_probs = tf.add(log_probs, masked_probs) next_inputs = tf.concat([inputs, tf.cast(sample_ids, inputs.dtype)], -1) next_lengths = inputs_lengths next_finished = tf.logical_or( finished, tf.equal(tf.squeeze(sample_ids, axis=[1]), end_token)) step = step + 1 if maximum_iterations is not None: next_finished = tf.logical_or(next_finished, step >= maximum_iterations) return step, next_finished, next_inputs, next_lengths, log_probs, cache
def recall(self, y_): y_true = tf.cast(tf.argmin(y_, 1), tf.bool) y_pred = tf.cast(tf.argmin(self.y, 1), tf.bool) # 1 stands for positive, 0 stands for negative tp = tf.reduce_sum(tf.cast(tf.logical_and(y_true, y_pred), tf.float32)) tn = tf.reduce_sum(tf.cast(tf.logical_not(tf.logical_or(y_true, y_pred)), tf.float32)) p = tf.reduce_sum(tf.cast(y_true, tf.float32)) n = tf.reduce_sum(tf.cast(tf.logical_not(y_true), tf.float32)) fp = p - tp fn = n - tn # t = tf.add(tp, tn) # f = tf.add(fp, fn) relevant = tf.add(tp, fn) recall = tf.div(tp, relevant) return recall
def next_symbols(self, time, sample_ids): """ Returns the output at `time`, also known as the input at `time`+1. Args: time: A int32 Scalar, the current time. sample_ids: A Tensor with shape [batch_size, ], returned by `sample_symbols()`. Returns: A tuple `(finished, next_symbols)`, where `finished` indicates whether each sequence is finished, and `next_symbols` is the next input Tensor with shape [batch_size * beam_size, ] """ next_time = time + 1 finished = tf.logical_or((next_time >= self._maximum_labels_length), tf.equal(self._vocab.eos_id, sample_ids)) return finished, sample_ids
def __init__(self, regularizers_to_group): """Creates an instance. Args: regularizers_to_group: A list of generic_regularizers.OpRegularizer objects.Their regularization_vector (alive_vector) are expected to be of the same length. Raises: ValueError: regularizers_to_group is not of length 2 (TODO: support arbitrary length if needed. """ _raise_if_length_is_not2(regularizers_to_group) self._regularization_vector = tf.maximum( regularizers_to_group[0].regularization_vector, regularizers_to_group[1].regularization_vector) self._alive_vector = tf.logical_or(regularizers_to_group[0].alive_vector, regularizers_to_group[1].alive_vector)
def _online_sample_masks(FLAGS, inputs, tgt_len, num_predict, boundary=None, stride=1): """Sample target positions to predict.""" # Set the number of tokens to mask out per example input_mask = tf.cast(tf.not_equal(inputs, FLAGS.pad_id), dtype=tf.int64) num_tokens = tf.cast(tf.reduce_sum(input_mask, -1), tf.float32) # global_step = tf.train.get_or_create_global_step() # mask_prob = tf.train.polynomial_decay( # FLAGS.initial_ratio, # global_step, # int(FLAGS.num_train_steps*0.1), # end_learning_rate=FLAGS.final_ratio, # power=1.0, # cycle=True) mask_prob = FLAGS.final_ratio tf.logging.info("mask_prob: `%s`.", mask_prob) num_predict = tf.maximum( 1, tf.minimum(num_predict, tf.cast(tf.round(num_tokens * mask_prob), tf.int32))) num_predict = tf.cast(num_predict, tf.int32) tf.logging.info("Online sample with strategy: `%s`.", FLAGS.sample_strategy) if FLAGS.sample_strategy == "single_token": return _single_token_mask(inputs, tgt_len, num_predict) else: if FLAGS.sample_strategy == "whole_word": assert boundary is not None, "whole word sampling requires `boundary`" is_target, target_mask = _whole_word_mask(FLAGS, inputs, tgt_len, num_predict, boundary) elif FLAGS.sample_strategy == "token_span": is_target, target_mask = _token_span_mask(FLAGS, inputs, tgt_len, num_predict, stride=stride) elif FLAGS.sample_strategy == "word_span": assert boundary is not None, "word span sampling requires `boundary`" is_target, target_mask = _word_span_mask(FLAGS, inputs, tgt_len, num_predict, boundary, stride=stride) else: raise NotImplementedError valid_mask = tf.not_equal(inputs, FLAGS.pad_id) is_target = tf.logical_and(valid_mask, is_target) target_mask = target_mask * tf.cast(valid_mask, tf.float32) # Fill in single tokens if not full cur_num_masked = tf.reduce_sum(tf.cast(is_target, tf.int32)) extra_mask, extra_tgt_mask = _single_token_mask( FLAGS, inputs, tgt_len, num_predict - cur_num_masked, is_target) return tf.logical_or(is_target, extra_mask), target_mask + extra_tgt_mask
def not_time_0(): next_cell_state = cell_state # find predicted_ids values_list = [] indices_list = [] for i in range(beam_width): score = tf.add(tf.matmul(cell_output[i], weight_score), bias_score) softmax = tf.nn.softmax(score) log_prob = tf.log(softmax) values, indices = tf.nn.top_k( log_prob, beam_width, sorted=True) # [batch, beam], [batch, beam] # Note: indices is ids of words as well values = tf.add(values, tf.expand_dims( log_probs[:, i], -1)) # sum with previous log_prob values_list.append(values) indices_list.append(indices) concat_vlist = tf.concat( tf.unstack(values_list, axis=0), axis=-1) # [batch_size, beam_width*beam_width] concat_ilist = tf.concat(tf.unstack(indices_list, axis=0), axis=-1) top_values, index_in_vlist = tf.nn.top_k( concat_vlist, beam_width, sorted=True) # [batch_size, beam_width] # Note: in tf.nn.top_k, if sorted=False then it's values will be SORTED ASCENDING predicted_ids = get_word_ids(index_in_vlist, concat_ilist, batch_size) predicted_ids = tf.stack( predicted_ids) # [batch_size, beam_width] # new_beam_finished = tf.logical_or(tf.equal(predicted_ids, eos_vocab_id), beam_finished) # find parent_ids that match word_ids_to_add parent_indexs = index_in_vlist // beam_width # find new_log_probs new_log_probs = top_values # shift top-k according to beam_finished # which means we will shift predicted_ids, new_log_probs, parent_indexs def shift(tensor_1D, num_shift, vacancy_value): """ shift from left to right """ shift_value = tensor_1D[:beam_width - num_shift] fill_vacancy = tf.fill([num_shift], vacancy_value) return tf.concat([fill_vacancy, shift_value], axis=0) ids_arr = [] probs_arr = [] parents_arr = [] num_shifts = tf.map_fn( lambda beam: tf.reduce_sum(tf.cast(beam, tf.int32)), beam_finished, dtype=tf.int32) # Note: we don't shift using new_beam_finished to avoid newly finish # which will update -inf to final_log_probs for i in range(batch_size): num_shift = num_shifts[i] ids_arr.append( shift(predicted_ids[i], num_shift, eos_vocab_id)) probs_arr.append( shift(new_log_probs[i], num_shift, -np.inf)) parents_arr.append( shift(parent_indexs[i], num_shift, -1)) valid_shape = tf.shape(beam_finished) predicted_ids = tf.stack(ids_arr) predicted_ids = tf.reshape(predicted_ids, valid_shape) new_log_probs = tf.stack(probs_arr) new_log_probs = tf.reshape(new_log_probs, valid_shape) parent_indexs = tf.stack(parents_arr) parent_indexs = tf.reshape(parent_indexs, valid_shape) new_beam_finished = tf.logical_or( tf.equal(predicted_ids, eos_vocab_id), beam_finished) # define next_input finished = tf.reduce_all(elements_finished) next_input = tuple( tf.cond( finished, lambda: tf.nn.embedding_lookup( embedding_tgt, [eos_vocab_id] * batch_size), lambda: tf.nn.embedding_lookup( embedding_tgt, predicted_ids[:, i])) for i in range(beam_width)) return elements_finished, next_input, next_cell_state, predicted_ids, new_log_probs, new_beam_finished, parent_indexs
def run_step( time, max_seq_len, tgt_in_seq_ids, tgt_in_seq_len, predict_ta, n_queue_ta, q_start_index, q_end_index, hidden_state, sos_id, non_terminal_id, left_bracket_id, ): cur_id = tgt_in_seq_ids[time] cur_embed = tf.reshape(tf.nn.embedding_lookup( self.tgt_embeddings, cur_id), shape=[1, -1]) def true_fn(q_start_index, n_queue_ta, hidden_state): state = n_queue_ta.read(q_start_index) q_start_index = q_start_index + 1 return ((state[0][:][:], state[1][:][:]), q_start_index, n_queue_ta) def false_fn(q_start_index, n_queue_ta, hidden_state): return (hidden_state, q_start_index, n_queue_ta) condition = tf.logical_and( tf.logical_or(tf.equal(cur_id, sos_id), tf.equal(cur_id, left_bracket_id)), tf.less(q_start_index, q_end_index)) pre_state, q_start_index, n_queue_ta = tf.cond( condition, true_fn=lambda: true_fn(q_start_index, n_queue_ta, hidden_state), false_fn=lambda: false_fn(q_start_index, n_queue_ta, hidden_state)) call_cell = lambda: cell(cur_embed, pre_state) def output_state_false_fn(pre_state): return (create_zero_array(shape=[1, cell.output_size], dtype=tf.float32), pre_state) new_output, new_state = tf.cond( tf.less(time, tgt_in_seq_len), true_fn=call_cell, false_fn=lambda: output_state_false_fn(pre_state)) logit = dense(new_output) print('logit:', logit) output_id = tf.reshape(tf.cast(tf.argmax(logit, axis=-1), dtype=tf.int32), shape=()) new_output = tf.reshape(new_output, shape=[cell.output_size]) predict_ta = predict_ta.write(time, new_output) def true_fn1(state, n_queue_ta, q_end_index): n_queue_ta = n_queue_ta.write(q_end_index, state) q_end_index = q_end_index + 1 return (q_end_index, n_queue_ta) def false_fn1(q_end_index, n_queue_ta): return (q_end_index, n_queue_ta) q_end_index, n_queue_ta = tf.cond( tf.equal(output_id, non_terminal_id), true_fn=lambda: true_fn1(new_state, n_queue_ta, q_end_index ), false_fn=lambda: false_fn1(q_end_index, n_queue_ta)) return (time + 1, max_seq_len, tgt_in_seq_ids, tgt_in_seq_len, predict_ta, n_queue_ta, q_start_index, q_end_index, new_state, sos_id, non_terminal_id, left_bracket_id)
def define_collect(policy_factory, batch_env, hparams, eval_phase): """Collect trajectories.""" eval_phase = tf.convert_to_tensor(eval_phase) memory_shape = [hparams.epoch_length ] + [batch_env.observ.shape.as_list()[0]] memories_shapes_and_types = [ # observation (memory_shape + batch_env.observ.shape.as_list()[1:], tf.float32), (memory_shape, tf.float32), # reward (memory_shape, tf.bool), # done # action (memory_shape + batch_env.action_shape, batch_env.action_dtype), (memory_shape, tf.float32), # pdf (memory_shape, tf.float32), # value function ] memory = [ tf.Variable(tf.zeros(shape, dtype), trainable=False) for (shape, dtype) in memories_shapes_and_types ] cumulative_rewards = tf.get_variable("cumulative_rewards", len(batch_env), trainable=False) should_reset_var = tf.Variable(True, trainable=False) def group(): return tf.group( batch_env.reset(tf.range(len(batch_env))), tf.assign(cumulative_rewards, tf.zeros(len(batch_env)))) reset_op = tf.cond(tf.logical_or(should_reset_var, eval_phase), group, tf.no_op) with tf.control_dependencies([reset_op]): reset_once_op = tf.assign(should_reset_var, False) with tf.control_dependencies([reset_once_op]): def step(index, scores_sum, scores_num): """Single step.""" index %= hparams.epoch_length # Only needed in eval runs. # Note - the only way to ensure making a copy of tensor is to run simple # operation. We are waiting for tf.copy: # https://github.com/tensorflow/tensorflow/issues/11186 obs_copy = batch_env.observ + 0 actor_critic = policy_factory(tf.expand_dims(obs_copy, 0)) policy = actor_critic.policy action = tf.cond(eval_phase, policy.mode, policy.sample) postprocessed_action = actor_critic.action_postprocessing(action) simulate_output = batch_env.simulate(postprocessed_action[0, ...]) pdf = policy.prob(action)[0] with tf.control_dependencies(simulate_output): reward, done = simulate_output done = tf.reshape(done, (len(batch_env), )) to_save = [ obs_copy, reward, done, action[0, ...], pdf, actor_critic.value[0] ] save_ops = [ tf.scatter_update(memory_slot, index, value) for memory_slot, value in zip(memory, to_save) ] cumulate_rewards_op = cumulative_rewards.assign_add(reward) agent_indices_to_reset = tf.where(done)[:, 0] with tf.control_dependencies([cumulate_rewards_op]): scores_sum_delta = tf.reduce_sum( tf.gather(cumulative_rewards, agent_indices_to_reset)) scores_num_delta = tf.count_nonzero(done, dtype=tf.int32) with tf.control_dependencies(save_ops + [scores_sum_delta, scores_num_delta]): reset_env_op = batch_env.reset(agent_indices_to_reset) reset_cumulative_rewards_op = tf.scatter_update( cumulative_rewards, agent_indices_to_reset, tf.zeros(tf.shape(agent_indices_to_reset))) with tf.control_dependencies( [reset_env_op, reset_cumulative_rewards_op]): return [ index + 1, scores_sum + scores_sum_delta, scores_num + scores_num_delta ] def stop_condition(i, _, resets): return tf.cond(eval_phase, lambda: resets < hparams.num_eval_agents, lambda: i < hparams.epoch_length) init = [tf.constant(0), tf.constant(0.0), tf.constant(0)] index, scores_sum, scores_num = tf.while_loop(stop_condition, step, init, parallel_iterations=1, back_prop=False) mean_score = tf.cond(tf.greater(scores_num, 0), lambda: scores_sum / tf.cast(scores_num, tf.float32), lambda: 0.) printing = tf.Print(0, [mean_score, scores_sum, scores_num], "mean_score: ") with tf.control_dependencies([index, printing]): memory = [tf.identity(mem) for mem in memory] mean_score_summary = tf.cond( tf.greater(scores_num, 0), lambda: tf.summary.scalar("mean_score_this_iter", mean_score), str) summaries = tf.summary.merge([ mean_score_summary, tf.summary.scalar("episodes_finished_this_iter", scores_num) ]) return memory, summaries
def create_dataset_batch_queue(dataset, preprocessing_flag=True): from preprocessing import preprocessing_factory with tf.device('/cpu:0'): with tf.name_scope(FLAGS.dataset_name + '_data_provider'): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=1000 * config.batch_size, common_queue_min=700 * config.batch_size, shuffle=True) # Get for SSD network: image, labels, bboxes. [ image, full_mask, weak_mask, liver_mask, glabel, x1, x2, x3, x4, y1, y2, y3, y4 ] = provider.get([ 'image', 'full_mask', 'mask_image', 'liver_mask', 'object/label', 'object/oriented_bbox/x1', 'object/oriented_bbox/x2', 'object/oriented_bbox/x3', 'object/oriented_bbox/x4', 'object/oriented_bbox/y1', 'object/oriented_bbox/y2', 'object/oriented_bbox/y3', 'object/oriented_bbox/y4' ]) gxs = tf.transpose(tf.stack([x1, x2, x3, x4])) # shape = (N, 4) gys = tf.transpose(tf.stack([y1, y2, y3, y4])) liver_mask = tf.cast( tf.logical_or(tf.equal(liver_mask, 1), tf.equal(full_mask, 1)), tf.uint8) if FLAGS.full_annotation_flag: mask_image = full_mask else: mask_image = weak_mask image = tf.reshape(image, [512, 512, 3]) mask_image = tf.reshape(mask_image, [512, 512, 1]) liver_mask = tf.reshape(liver_mask, [512, 512, 1]) mask_image = tf.concat([mask_image, mask_image, mask_image], axis=-1) liver_mask = tf.concat([liver_mask, liver_mask, liver_mask], axis=-1) image = tf.identity(image, 'input_image') image = tf.expand_dims(image[:, :, 1], axis=2) image = tf.concat([image, image, image], axis=2) mask_image = tf.identity(mask_image, 'mask_image') liver_mask = tf.identity(liver_mask, 'liver_mask') liver_mask = tf.cast(liver_mask, tf.uint8) mask_image = tf.cast(mask_image, tf.uint8) _, pixel_cls_weight = \ tf_cal_gt_for_single_image(gxs, gys, glabel) print('Image: ', image) print('MaskImage: ', mask_image) print('LiverMask: ', liver_mask) print('pixel_cls_weights is ', pixel_cls_weight) pixel_cls_weight = tf.expand_dims(pixel_cls_weight, axis=2) image, mask_image, liver_mask, pixel_cls_weight = preprocessing_factory.get_preprocessing( is_training=True, method='segmentation_with_weight')(image, mask_image, liver_mask, pixel_cls_weight) from preprocessing import tf_image # liver_mask = tf_image.resize_image(liver_mask, config.train_image_shape, # method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, # align_corners=False) image = tf.identity(image, 'processed_image') mask_image = tf.identity(mask_image, 'processed_mask_image') liver_mask = tf.identity(liver_mask, 'processed_mask_image') pixel_cls_weight = tf.identity(pixel_cls_weight, 'processed_pixel_weight') # batch them with tf.name_scope(FLAGS.dataset_name + '_batch'): b_image, b_mask_image, b_liver_mask, b_pixel_cls_weight = \ tf.train.batch( [image, mask_image, liver_mask, pixel_cls_weight], batch_size=config.batch_size_per_gpu, num_threads=FLAGS.num_preprocessing_threads, capacity=500) with tf.name_scope(FLAGS.dataset_name + '_prefetch_queue'): batch_queue = slim.prefetch_queue.prefetch_queue( [b_image, b_mask_image, b_liver_mask, b_pixel_cls_weight], capacity=50) return batch_queue
def construct_network(self): self.word_ids = tf.placeholder(tf.int32, [None, None], name="word_ids") self.char_ids = tf.placeholder(tf.int32, [None, None, None], name="char_ids") self.sentence_lengths = tf.placeholder(tf.int32, [None], name="sentence_lengths") self.word_lengths = tf.placeholder(tf.int32, [None, None], name="word_lengths") self.label_ids = tf.placeholder(tf.int32, [None, None], name="label_ids") self.learningrate = tf.placeholder(tf.float32, name="learningrate") self.is_training = tf.placeholder(tf.int32, name="is_training") self.loss = 0.0 input_tensor = None input_vector_size = 0 self.initializer = None if self.config["initializer"] == "normal": self.initializer = tf.random_normal_initializer(mean=0.0, stddev=0.1) elif self.config["initializer"] == "glorot": self.initializer = tf.glorot_uniform_initializer() elif self.config["initializer"] == "xavier": self.initializer = tf.glorot_normal_initializer() else: raise ValueError("Unknown initializer") self.word_embeddings = tf.get_variable( "word_embeddings", shape=[len(self.word2id), self.config["word_embedding_size"]], initializer=(tf.zeros_initializer() if self.config["emb_initial_zero"] == True else self.initializer), trainable=(True if self.config["train_embeddings"] == True else False)) input_tensor = tf.nn.embedding_lookup(self.word_embeddings, self.word_ids) input_vector_size = self.config["word_embedding_size"] if self.config["char_embedding_size"] > 0 and self.config[ "char_recurrent_size"] > 0: with tf.variable_scope("chars"), tf.control_dependencies([ tf.assert_equal(tf.shape(self.char_ids)[2], tf.reduce_max(self.word_lengths), message="Char dimensions don't match") ]): self.char_embeddings = tf.get_variable( "char_embeddings", shape=[ len(self.char2id), self.config["char_embedding_size"] ], initializer=self.initializer, trainable=True) char_input_tensor = tf.nn.embedding_lookup( self.char_embeddings, self.char_ids) s = tf.shape(char_input_tensor) char_input_tensor = tf.reshape( char_input_tensor, shape=[ s[0] * s[1], s[2], self.config["char_embedding_size"] ]) _word_lengths = tf.reshape(self.word_lengths, shape=[s[0] * s[1]]) char_lstm_cell_fw = tf.nn.rnn_cell.LSTMCell( self.config["char_recurrent_size"], use_peepholes=self.config["lstm_use_peepholes"], state_is_tuple=True, initializer=self.initializer, reuse=False) char_lstm_cell_bw = tf.nn.rnn_cell.LSTMCell( self.config["char_recurrent_size"], use_peepholes=self.config["lstm_use_peepholes"], state_is_tuple=True, initializer=self.initializer, reuse=False) char_lstm_outputs = tf.nn.bidirectional_dynamic_rnn( char_lstm_cell_fw, char_lstm_cell_bw, char_input_tensor, sequence_length=_word_lengths, dtype=tf.float32, time_major=False) _, ((_, char_output_fw), (_, char_output_bw)) = char_lstm_outputs char_output_tensor = tf.concat( [char_output_fw, char_output_bw], axis=-1) char_output_tensor = tf.reshape( char_output_tensor, shape=[s[0], s[1], 2 * self.config["char_recurrent_size"]]) char_output_vector_size = 2 * self.config["char_recurrent_size"] if self.config["lmcost_char_gamma"] > 0.0: self.loss += self.config[ "lmcost_char_gamma"] * self.construct_lmcost( char_output_tensor, char_output_tensor, self.sentence_lengths, self.word_ids, "separate", "lmcost_char_separate") if self.config["lmcost_joint_char_gamma"] > 0.0: self.loss += self.config[ "lmcost_joint_char_gamma"] * self.construct_lmcost( char_output_tensor, char_output_tensor, self.sentence_lengths, self.word_ids, "joint", "lmcost_char_joint") if self.config["char_hidden_layer_size"] > 0: char_hidden_layer_size = self.config[ "word_embedding_size"] if self.config[ "char_integration_method"] == "attention" else self.config[ "char_hidden_layer_size"] char_output_tensor = tf.layers.dense( char_output_tensor, char_hidden_layer_size, activation=tf.tanh, kernel_initializer=self.initializer) char_output_vector_size = char_hidden_layer_size if self.config["char_integration_method"] == "concat": input_tensor = tf.concat( [input_tensor, char_output_tensor], axis=-1) input_vector_size += char_output_vector_size elif self.config["char_integration_method"] == "attention": assert ( char_output_vector_size == self.config["word_embedding_size"] ), "This method requires the char representation to have the same size as word embeddings" static_input_tensor = tf.stop_gradient(input_tensor) is_unk = tf.equal(self.word_ids, self.word2id[self.UNK]) char_output_tensor_normalised = tf.nn.l2_normalize( char_output_tensor, 2) static_input_tensor_normalised = tf.nn.l2_normalize( static_input_tensor, 2) cosine_cost = 1.0 - tf.reduce_sum(tf.multiply( char_output_tensor_normalised, static_input_tensor_normalised), axis=2) is_padding = tf.logical_not( tf.sequence_mask(self.sentence_lengths, maxlen=tf.shape(self.word_ids)[1])) cosine_cost_unk = tf.where(tf.logical_or( is_unk, is_padding), x=tf.zeros_like(cosine_cost), y=cosine_cost) self.loss += self.config[ "char_attention_cosine_cost"] * tf.reduce_sum( cosine_cost_unk) attention_evidence_tensor = tf.concat( [input_tensor, char_output_tensor], axis=2) attention_output = tf.layers.dense( attention_evidence_tensor, self.config["word_embedding_size"], activation=tf.tanh, kernel_initializer=self.initializer) attention_output = tf.layers.dense( attention_output, self.config["word_embedding_size"], activation=tf.sigmoid, kernel_initializer=self.initializer) input_tensor = tf.multiply( input_tensor, attention_output) + tf.multiply( char_output_tensor, (1.0 - attention_output)) elif self.config["char_integration_method"] == "none": input_tensor = input_tensor else: raise ValueError("Unknown char integration method") dropout_input = self.config["dropout_input"] * tf.cast( self.is_training, tf.float32) + ( 1.0 - tf.cast(self.is_training, tf.float32)) input_tensor = tf.nn.dropout(input_tensor, dropout_input, name="dropout_word") word_lstm_cell_fw = tf.nn.rnn_cell.LSTMCell( self.config["word_recurrent_size"], use_peepholes=self.config["lstm_use_peepholes"], state_is_tuple=True, initializer=self.initializer, reuse=False) word_lstm_cell_bw = tf.nn.rnn_cell.LSTMCell( self.config["word_recurrent_size"], use_peepholes=self.config["lstm_use_peepholes"], state_is_tuple=True, initializer=self.initializer, reuse=False) with tf.control_dependencies([ tf.assert_equal(tf.shape(self.word_ids)[1], tf.reduce_max(self.sentence_lengths), message="Sentence dimensions don't match") ]): (lstm_outputs_fw, lstm_outputs_bw), _ = tf.nn.bidirectional_dynamic_rnn( word_lstm_cell_fw, word_lstm_cell_bw, input_tensor, sequence_length=self.sentence_lengths, dtype=tf.float32, time_major=False) dropout_word_lstm = self.config["dropout_word_lstm"] * tf.cast( self.is_training, tf.float32) + ( 1.0 - tf.cast(self.is_training, tf.float32)) lstm_outputs_fw = tf.nn.dropout(lstm_outputs_fw, dropout_word_lstm) lstm_outputs_bw = tf.nn.dropout(lstm_outputs_bw, dropout_word_lstm) if self.config["lmcost_lstm_gamma"] > 0.0: self.loss += self.config[ "lmcost_lstm_gamma"] * self.construct_lmcost( lstm_outputs_fw, lstm_outputs_bw, self.sentence_lengths, self.word_ids, "separate", "lmcost_lstm_separate") if self.config["lmcost_joint_lstm_gamma"] > 0.0: self.loss += self.config[ "lmcost_joint_lstm_gamma"] * self.construct_lmcost( lstm_outputs_fw, lstm_outputs_bw, self.sentence_lengths, self.word_ids, "joint", "lmcost_lstm_joint") processed_tensor = tf.concat([lstm_outputs_fw, lstm_outputs_bw], 2) processed_tensor_size = self.config["word_recurrent_size"] * 2 if self.config["hidden_layer_size"] > 0: processed_tensor = tf.layers.dense( processed_tensor, self.config["hidden_layer_size"], activation=tf.tanh, kernel_initializer=self.initializer) processed_tensor_size = self.config["hidden_layer_size"] self.scores = tf.layers.dense(processed_tensor, len(self.label2id), activation=None, kernel_initializer=self.initializer, name="output_ff") if self.config["crf_on_top"] == True: crf_num_tags = self.scores.get_shape()[2].value self.crf_transition_params = tf.get_variable( "output_crf_transitions", [crf_num_tags, crf_num_tags], initializer=self.initializer) log_likelihood, self.crf_transition_params = tf.contrib.crf.crf_log_likelihood( self.scores, self.label_ids, self.sentence_lengths, transition_params=self.crf_transition_params) self.loss += self.config["main_cost"] * tf.reduce_sum( -log_likelihood) else: self.probabilities = tf.nn.softmax(self.scores) self.predictions = tf.argmax(self.probabilities, 2) loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.scores, labels=self.label_ids) mask = tf.sequence_mask(self.sentence_lengths, maxlen=tf.shape(self.word_ids)[1]) loss_ = tf.boolean_mask(loss_, mask) self.loss += self.config["main_cost"] * tf.reduce_sum(loss_) self.train_op = self.construct_optimizer(self.config["opt_strategy"], self.loss, self.learningrate, self.config["clip"])
def batch_assign_confidences(target_assigner, anchors_batch, gt_box_batch, gt_class_confidences_batch, gt_weights_batch=None, unmatched_class_label=None, include_background_class=True, implicit_class_weight=1.0): """Batched assignment of classification and regression targets. This differences between batch_assign_confidences and batch_assign_targets: - 'batch_assign_targets' supports scalar (agnostic), vector (multiclass) and tensor (high-dimensional) targets. 'batch_assign_confidences' only support scalar (agnostic) and vector (multiclass) targets. - 'batch_assign_targets' assumes the input class tensor using the binary one/K-hot encoding. 'batch_assign_confidences' takes the class confidence scores as the input, where 1 means positive classes, 0 means implicit negative classes, and -1 means explicit negative classes. - 'batch_assign_confidences' assigns the targets in the similar way as 'batch_assign_targets' except that it gives different weights for implicit and explicit classes. This allows user to control the negative gradients pushed differently for implicit and explicit examples during the training. Args: target_assigner: a target assigner. anchors_batch: BoxList representing N box anchors or list of BoxList objects with length batch_size representing anchor sets. gt_box_batch: a list of BoxList objects with length batch_size representing groundtruth boxes for each image in the batch gt_class_confidences_batch: a list of tensors with length batch_size, where each tensor has shape [num_gt_boxes_i, classification_target_size] and num_gt_boxes_i is the number of boxes in the ith boxlist of gt_box_batch. Note that in this tensor, 1 means explicit positive class, -1 means explicit negative class, and 0 means implicit negative class. gt_weights_batch: A list of 1-D tf.float32 tensors of shape [num_gt_boxes_i] containing weights for groundtruth boxes. unmatched_class_label: a float32 tensor with shape [d_1, d_2, ..., d_k] which is consistent with the classification target for each anchor (and can be empty for scalar targets). This shape must thus be compatible with the groundtruth labels that are passed to the "assign" function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). include_background_class: whether or not gt_class_confidences_batch includes the background class. implicit_class_weight: the weight assigned to implicit examples. Returns: batch_cls_targets: a tensor with shape [batch_size, num_anchors, num_classes], batch_cls_weights: a tensor with shape [batch_size, num_anchors, num_classes], batch_reg_targets: a tensor with shape [batch_size, num_anchors, box_code_dimension] batch_reg_weights: a tensor with shape [batch_size, num_anchors], match_list: a list of matcher.Match objects encoding the match between anchors and groundtruth boxes for each image of the batch, with rows of the Match objects corresponding to groundtruth boxes and columns corresponding to anchors. Raises: ValueError: if input list lengths are inconsistent, i.e., batch_size == len(gt_box_batch) == len(gt_class_targets_batch) and batch_size == len(anchors_batch) unless anchors_batch is a single BoxList, or if any element in gt_class_confidences_batch has rank > 2. """ if not isinstance(anchors_batch, list): anchors_batch = len(gt_box_batch) * [anchors_batch] if not all( isinstance(anchors, box_list.BoxList) for anchors in anchors_batch): raise ValueError( 'anchors_batch must be a BoxList or list of BoxLists.') if not (len(anchors_batch) == len(gt_box_batch) == len(gt_class_confidences_batch)): raise ValueError( 'batch size incompatible with lengths of anchors_batch, ' 'gt_box_batch and gt_class_confidences_batch.') cls_targets_list = [] cls_weights_list = [] reg_targets_list = [] reg_weights_list = [] match_list = [] if gt_weights_batch is None: gt_weights_batch = [None] * len(gt_class_confidences_batch) for anchors, gt_boxes, gt_class_confidences, gt_weights in zip( anchors_batch, gt_box_batch, gt_class_confidences_batch, gt_weights_batch): if (gt_class_confidences is not None and len(gt_class_confidences.get_shape().as_list()) > 2): raise ValueError( 'The shape of the class target is not supported. ', gt_class_confidences.get_shape()) cls_targets, _, reg_targets, _, match = target_assigner.assign( anchors, gt_boxes, gt_class_confidences, unmatched_class_label, groundtruth_weights=gt_weights) if include_background_class: cls_targets_without_background = tf.slice(cls_targets, [0, 1], [-1, -1]) else: cls_targets_without_background = cls_targets positive_mask = tf.greater(cls_targets_without_background, 0.0) negative_mask = tf.less(cls_targets_without_background, 0.0) explicit_example_mask = tf.logical_or(positive_mask, negative_mask) positive_anchors = tf.reduce_any(positive_mask, axis=-1) regression_weights = tf.to_float(positive_anchors) regression_targets = (reg_targets * tf.expand_dims(regression_weights, axis=-1)) regression_weights_expanded = tf.expand_dims(regression_weights, axis=-1) cls_targets_without_background = (cls_targets_without_background * (1 - tf.to_float(negative_mask))) cls_weights_without_background = ( (1 - implicit_class_weight) * tf.to_float(explicit_example_mask) + implicit_class_weight) if include_background_class: cls_weights_background = ( (1 - implicit_class_weight) * regression_weights_expanded + implicit_class_weight) classification_weights = tf.concat( [cls_weights_background, cls_weights_without_background], axis=-1) cls_targets_background = 1 - regression_weights_expanded classification_targets = tf.concat( [cls_targets_background, cls_targets_without_background], axis=-1) else: classification_targets = cls_targets_without_background classification_weights = cls_weights_without_background cls_targets_list.append(classification_targets) cls_weights_list.append(classification_weights) reg_targets_list.append(regression_targets) reg_weights_list.append(regression_weights) match_list.append(match) batch_cls_targets = tf.stack(cls_targets_list) batch_cls_weights = tf.stack(cls_weights_list) batch_reg_targets = tf.stack(reg_targets_list) batch_reg_weights = tf.stack(reg_weights_list) return (batch_cls_targets, batch_cls_weights, batch_reg_targets, batch_reg_weights, match_list)
def _generate_panoptic_masks(self, boxes, scores, classes, detections_masks, segmentation_mask): """Generates panoptic masks for a single image. This function implements the following steps to merge instance and semantic segmentation masks described in https://arxiv.org/pdf/1901.02446.pdf Steps: 1. resolving overlaps between different instances based on their confidence scores 2. resolving overlaps between instance and semantic segmentation outputs in favor of instances 3. removing any stuff regions labeled other or under a given area threshold. Args: boxes: A `tf.Tensor` of shape [num_rois, 4], representing the bounding boxes for detected objects. scores: A `tf.Tensor` of shape [num_rois], representing the confidence scores for each object. classes: A `tf.Tensor` of shape [num_rois], representing the class for each object. detections_masks: A `tf.Tensor` of shape [num_rois, mask_height, mask_width, 1], representing the cropped mask for each object. segmentation_mask: A `tf.Tensor` of shape [height, width], representing the semantic segmentation output. Returns: Dict with the following keys: - category_mask: A `tf.Tensor` for category masks. - instance_mask: A `tf.Tensor for instance masks. """ # Offset stuff class predictions segmentation_mask = tf.where( tf.logical_or( tf.equal(segmentation_mask, self._things_class_label), tf.equal(segmentation_mask, self._void_class_label)), segmentation_mask, segmentation_mask + self._stuff_classes_offset) # sort instances by their scores sorted_indices = tf.argsort(scores, direction='DESCENDING') mask_shape = self._output_size + [1] category_mask = tf.ones(mask_shape, dtype=tf.float32) * self._void_class_label instance_mask = tf.ones(mask_shape, dtype=tf.float32) * self._void_instance_id # filter instances with low confidence sorted_scores = tf.sort(scores, direction='DESCENDING') valid_indices = tf.where(sorted_scores > self._score_threshold) # if no instance has sufficient confidence score, skip merging # instance segmentation masks if tf.shape(valid_indices)[0] > 0: loop_end_idx = valid_indices[-1, 0] + 1 loop_end_idx = tf.minimum(tf.cast(loop_end_idx, dtype=tf.int32), self._max_num_detections) pasted_masks = self._paste_masks_fn( (detections_masks[:loop_end_idx], boxes[:loop_end_idx])) # add things segmentation to panoptic masks for i in range(loop_end_idx): # we process instances in decending order, which will make sure # the overlaps are resolved based on confidence score instance_idx = sorted_indices[i] pasted_mask = pasted_masks[instance_idx] class_id = tf.cast(classes[instance_idx], dtype=tf.float32) # convert sigmoid scores to binary values binary_mask = tf.greater(pasted_mask, self._mask_binarize_threshold) # filter empty instance masks if not tf.reduce_sum(tf.cast(binary_mask, tf.float32)) > 0: continue overlap = tf.logical_and( binary_mask, tf.not_equal(category_mask, self._void_class_label)) binary_mask_area = tf.reduce_sum( tf.cast(binary_mask, dtype=tf.float32)) overlap_area = tf.reduce_sum(tf.cast(overlap, dtype=tf.float32)) # skip instance that have a big enough overlap with instances with # higer scores if overlap_area / binary_mask_area > self._things_overlap_threshold: continue # fill empty regions in category_mask represented by # void_class_label with class_id of the instance. category_mask = tf.where( tf.logical_and( binary_mask, tf.equal(category_mask, self._void_class_label)), tf.ones_like(category_mask) * class_id, category_mask) # fill empty regions in the instance_mask represented by # void_instance_id with the id of the instance, starting from 1 instance_mask = tf.where( tf.logical_and( binary_mask, tf.equal(instance_mask, self._void_instance_id)), tf.ones_like(instance_mask) * tf.cast(instance_idx + 1, tf.float32), instance_mask) stuff_class_ids = tf.unique(tf.reshape(segmentation_mask, [-1])).y for stuff_class_id in stuff_class_ids: if stuff_class_id == self._things_class_label: continue stuff_mask = tf.logical_and( tf.equal(segmentation_mask, stuff_class_id), tf.equal(category_mask, self._void_class_label)) stuff_mask_area = tf.reduce_sum( tf.cast(stuff_mask, dtype=tf.float32)) if stuff_mask_area < self._stuff_area_threshold: continue category_mask = tf.where( stuff_mask, tf.ones_like(category_mask) * stuff_class_id, category_mask) results = { 'category_mask': category_mask[:, :, 0], 'instance_mask': instance_mask[:, :, 0] } return results
def xdet_model_fn(features, labels, mode, params): """Our model_fn for ResNet to be used with our Estimator.""" num_anchors_list = labels['num_anchors_list'] num_feature_layers = len(num_anchors_list) shape = labels['targets'][-1] if mode != tf.estimator.ModeKeys.TRAIN: org_image = labels['targets'][-2] isdifficult = labels['targets'][-3] bbox_img = labels['targets'][-4] gbboxes_raw = labels['targets'][-5] glabels_raw = labels['targets'][-6] glabels = labels['targets'][:num_feature_layers][0] gtargets = labels['targets'][num_feature_layers:2 * num_feature_layers][0] gscores = labels['targets'][2 * num_feature_layers:3 * num_feature_layers][0] with tf.variable_scope(params['model_scope'], default_name=None, values=[features], reuse=tf.AUTO_REUSE): backbone = xdet_body_v2.xdet_resnet_v2(params['resnet_size'], params['data_format']) body_cls_output, body_regress_output = backbone( inputs=features, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) cls_pred, location_pred = xdet_body_v2.xdet_head( body_cls_output, body_regress_output, params['num_classes'], num_anchors_list[0], (mode == tf.estimator.ModeKeys.TRAIN), data_format=params['data_format']) if params['data_format'] == 'channels_first': cls_pred = tf.transpose(cls_pred, [0, 2, 3, 1]) location_pred = tf.transpose(location_pred, [0, 2, 3, 1]) #org_image = tf.transpose(org_image, [0, 2, 3, 1]) # batch size is 1 shape = tf.squeeze(shape, axis=0) glabels = tf.squeeze(glabels, axis=0) gtargets = tf.squeeze(gtargets, axis=0) gscores = tf.squeeze(gscores, axis=0) cls_pred = tf.squeeze(cls_pred, axis=0) location_pred = tf.squeeze(location_pred, axis=0) if mode != tf.estimator.ModeKeys.TRAIN: org_image = tf.squeeze(org_image, axis=0) isdifficult = tf.squeeze(isdifficult, axis=0) gbboxes_raw = tf.squeeze(gbboxes_raw, axis=0) glabels_raw = tf.squeeze(glabels_raw, axis=0) bbox_img = tf.squeeze(bbox_img, axis=0) bboxes_pred = labels['decode_fn']( location_pred ) #(tf.reshape(location_pred, location_pred.get_shape().as_list()[:-1] + [-1, 4]))#(location_pred)# eval_ops, save_image_op = bboxes_eval(org_image, shape, bbox_img, cls_pred, bboxes_pred, glabels_raw, gbboxes_raw, isdifficult, params['num_classes']) _ = tf.identity(save_image_op, name='save_image_with_bboxes_op') cls_pred = tf.reshape(cls_pred, [-1, params['num_classes']]) location_pred = tf.reshape(location_pred, [-1, 4]) glabels = tf.reshape(glabels, [-1]) gscores = tf.reshape(gscores, [-1]) gtargets = tf.reshape(gtargets, [-1, 4]) # raw mask for positive > 0.5, and for negetive < 0.3 # each positive examples has one label positive_mask = glabels > 0 #tf.logical_and(glabels > 0, gscores > params['match_threshold']) fpositive_mask = tf.cast(positive_mask, tf.float32) n_positives = tf.reduce_sum(fpositive_mask) # negtive examples are those max_overlap is still lower than neg_threshold, note that some positive may also has lower jaccard # note those gscores is 0 is either be ignored during anchors encode or anchors have 0 overlap with all ground truth #negtive_mask = tf.logical_and(tf.logical_and(tf.logical_not(tf.logical_or(positive_mask, glabels < 0)), gscores < params['neg_threshold']), gscores > 0.) negtive_mask = tf.logical_and(tf.equal(glabels, 0), gscores > 0.) #negtive_mask = tf.logical_and(tf.logical_and(tf.logical_not(positive_mask), gscores < params['neg_threshold']), gscores > 0.) #negtive_mask = tf.logical_and(gscores < params['neg_threshold'], tf.logical_not(positive_mask)) fnegtive_mask = tf.cast(negtive_mask, tf.float32) n_negtives = tf.reduce_sum(fnegtive_mask) n_neg_to_select = tf.cast(params['negative_ratio'] * n_positives, tf.int32) n_neg_to_select = tf.minimum(n_neg_to_select, tf.cast(n_negtives, tf.int32)) # hard negative mining for classification predictions_for_bg = tf.nn.softmax(cls_pred)[:, 0] prob_for_negtives = tf.where( negtive_mask, 0. - predictions_for_bg, # ignore all the positives 0. - tf.ones_like(predictions_for_bg)) topk_prob_for_bg, _ = tf.nn.top_k(prob_for_negtives, k=n_neg_to_select) selected_neg_mask = prob_for_negtives > topk_prob_for_bg[-1] # # random select negtive examples for classification # selected_neg_mask = tf.random_uniform(tf.shape(gscores), minval=0, maxval=1.) < tf.where( # tf.greater(n_negtives, 0), # tf.divide(tf.cast(n_neg_to_select, tf.float32), n_negtives), # tf.zeros_like(tf.cast(n_neg_to_select, tf.float32)), # name='rand_select_negtive') # include both selected negtive and all positive examples final_mask = tf.stop_gradient( tf.logical_or(tf.logical_and(negtive_mask, selected_neg_mask), positive_mask)) total_examples = tf.reduce_sum(tf.cast(final_mask, tf.float32)) # add mask for glabels and cls_pred here glabels = tf.boolean_mask(tf.clip_by_value(glabels, 0, FLAGS.num_classes), tf.stop_gradient(final_mask)) cls_pred = tf.boolean_mask(cls_pred, tf.stop_gradient(final_mask)) location_pred = tf.boolean_mask(location_pred, tf.stop_gradient(positive_mask)) gtargets = tf.boolean_mask(gtargets, tf.stop_gradient(positive_mask)) # Calculate loss, which includes softmax cross entropy and L2 regularization. cross_entropy = tf.cond( n_positives > 0., lambda: tf.losses.sparse_softmax_cross_entropy( labels=glabels, logits=cls_pred), lambda: 0.) #cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=glabels, logits=cls_pred) # Create a tensor named cross_entropy for logging purposes. tf.identity(cross_entropy, name='cross_entropy_loss') tf.summary.scalar('cross_entropy_loss', cross_entropy) loc_loss = tf.cond( n_positives > 0., lambda: modified_smooth_l1( location_pred, tf.stop_gradient(gtargets), sigma=1.), lambda: tf.zeros_like(location_pred)) #loc_loss = modified_smooth_l1(location_pred, tf.stop_gradient(gtargets)) loc_loss = tf.reduce_mean(tf.reduce_sum(loc_loss, axis=-1)) loc_loss = tf.identity(loc_loss, name='location_loss') tf.summary.scalar('location_loss', loc_loss) tf.losses.add_loss(loc_loss) with tf.control_dependencies([save_image_op]): # Add weight decay to the loss. We exclude the batch norm variables because # doing so leads to a small improvement in accuracy. loss = 1.2 * (cross_entropy + loc_loss) + params['weight_decay'] * tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'batch_normalization' not in v.name ]) total_loss = tf.identity(loss, name='total_loss') predictions = { 'classes': tf.argmax(cls_pred, axis=-1), 'probabilities': tf.reduce_max(tf.nn.softmax(cls_pred, name='softmax_tensor'), axis=-1), 'bboxes_predict': tf.reshape(bboxes_pred, [-1, 4]), 'saved_image_index': save_image_op } summary_hook = tf.train.SummarySaverHook( save_secs=FLAGS.save_summary_steps, output_dir=FLAGS.model_dir, summary_op=tf.summary.merge_all()) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, evaluation_hooks=[summary_hook], loss=loss, eval_metric_ops=eval_ops) #=eval_ops) else: raise ValueError('This script only support predict mode!')
def ssd_model_fn(features, labels, mode, params): """model_fn for SSD to be used with our Estimator.""" shape = labels['shape'] loc_targets = labels['loc_targets'] cls_targets = labels['cls_targets'] match_scores = labels['match_scores'] global global_anchor_info decode_fn = global_anchor_info['decode_fn'] num_anchors_per_layer = global_anchor_info['num_anchors_per_layer'] all_num_anchors_depth = global_anchor_info['all_num_anchors_depth'] # bboxes_pred = decode_fn(loc_targets[0]) # bboxes_pred = [tf.reshape(preds, [-1, 4]) for preds in bboxes_pred] # bboxes_pred = tf.concat(bboxes_pred, axis=0) # save_image_op = tf.py_func(save_image_with_bbox, # [ssd_preprocessing.unwhiten_image(features[0]), # tf.clip_by_value(cls_targets[0], 0, tf.int64.max), # match_scores[0], # bboxes_pred], # tf.int64, stateful=True) # with tf.control_dependencies([save_image_op]): #print(all_num_anchors_depth) with tf.variable_scope(params['model_scope'], default_name=None, values=[features], reuse=tf.AUTO_REUSE): backbone = ssd_net.VGG16Backbone(params['data_format']) feature_layers = backbone.forward( features, training=(mode == tf.estimator.ModeKeys.TRAIN)) #print(feature_layers) #location_pred:[[batch_Size,4, 38, 38],[]] #cls_pred:[[batch_Size,num_classes, 38, 38, ]... # 10*10*6*num_classes, 5*5*6*num_classes, 3*3*4*num_classes, 1*!*4*num_classes] location_pred, cls_pred = ssd_net.multibox_head( feature_layers, params['num_classes'], all_num_anchors_depth, data_format=params['data_format']) if params['data_format'] == 'channels_first': cls_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred] location_pred = [ tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred ] #cls_pred:[[batch_size, 38, 38, 4],[]...] #location_pred:[[batch_size, 38, 38, 1],[]...] cls_pred = [ tf.reshape(pred, [tf.shape(features)[0], -1, params['num_classes']]) for pred in cls_pred ] location_pred = [ tf.reshape(pred, [tf.shape(features)[0], -1, 4]) for pred in location_pred ] #clas_pred:[[batch_size, 38*38*4, class_num],...] #location_pred:[[batch_size, 38*38*4 , 4]...] cls_pred = tf.concat(cls_pred, axis=1) location_pred = tf.concat(location_pred, axis=1) cls_pred = tf.reshape(cls_pred, [-1, params['num_classes']]) location_pred = tf.reshape(location_pred, [-1, 4]) # clas_pred:[batch_size*(38*38*4 + 19*19*6 + 10*10*6 + 5*5*6 + 3*3*4 + 1*1*4), num_class] # location_pred:[batch_size*(38*38*4 + 19*19*6 + 10*10*6 + 5*5*6 + 3*3*4 + 1*1*4), 4] with tf.device('/cpu:0'): with tf.control_dependencies([cls_pred, location_pred]): with tf.name_scope('post_forward'): #location_pred:[batch_size, 8732, 4] 里面包含着每一个prior_bbox的偏移量预测值 #decode_fn:根据8732个prior_bbox自身的坐标与与之对应的偏移量,就可以得出实际的8732个预测框的位置 bboxes_pred = decode_fn( tf.reshape(location_pred, [tf.shape(features)[0], -1, 4])) bboxes_pred = tf.reshape(bboxes_pred, [-1, 4]) #bboxes_pred:[batch_size*8732, 4], 4的含义是bbox的[ymin, xmin, ymax, xmax] #cls_targets:[batch_Size, 8732] flaten_cls_targets = tf.reshape(cls_targets, [-1]) #[batch_size*8732] flaten_match_scores = tf.reshape(match_scores, [-1]) flaten_loc_targets = tf.reshape(loc_targets, [-1, 4]) #[batch_size*8732, 4] # each positive examples has one label positive_mask = flaten_cls_targets > 0 n_positives = tf.count_nonzero(positive_mask) #batch_n_positives:[batch_size], 其中第i个数字x代表第i张图片上有x个正例prior_bbox。 batch_n_positives = tf.count_nonzero(cls_targets > 0, -1) #batch_negative_mask:[batch_size, 8732]. batch_negtive_mask = tf.equal(cls_targets, 0) #batch_n_negtives:[batch_size]其中第i个数字x代表第i张图片上有x个负例prior_bbox。 batch_n_negtives = tf.count_nonzero(batch_negtive_mask, -1) #negative_ratio:3。 也就是说负例数量是正例的3倍 batch_n_neg_select = tf.to_int32( params['negative_ratio'] * tf.to_float(batch_n_positives)) batch_n_neg_select = tf.minimum(batch_n_neg_select, tf.to_int32(batch_n_negtives)) #batch_n_neg_select:[batch_size]->第i个数字x代表第i张图片选x个负例prior_bbox # hard negative mining for classification # predictions_for_bg:[batch_size, 8732] predictions_for_bg = tf.nn.softmax( tf.reshape( cls_pred, [tf.shape(features)[0], -1, params['num_classes'] ]))[:, :, 0] prob_for_negtives = tf.where( batch_negtive_mask, 0. - predictions_for_bg, # ignore all the positives 0. - tf.ones_like(predictions_for_bg)) #prob_for_negtives:[batch_size, 8732]。如果prior_bbox的label_cls为0则把背景预测值填进去,否则就填-1 #topk_prob_for_bg:[batch_size, 8732],其中第二维度是从大大小排序的 topk_prob_for_bg, _ = tf.nn.top_k( prob_for_negtives, k=tf.shape(prob_for_negtives)[1]) #score_at_k:[batch_size] 第i个数字x代表:第i张图片选m个负例prior_bbox, 而这m个框中预测是背景的最高分是-x。 #换句话说,最低分代表预测得很离谱,明明是背景,但是它(-x)的分确很低。(带负号是因为line353,因为方便排序加上的) score_at_k = tf.gather_nd( topk_prob_for_bg, tf.stack([ tf.range(tf.shape(features)[0]), batch_n_neg_select - 1 ], axis=-1)) #selected_neg_mask:[batch_size, 8732].其中被选择的负例对应位置为True,否则是False selected_neg_mask = prob_for_negtives >= tf.expand_dims( score_at_k, axis=-1) # include both selected negtive and all positive examples # final_mask:[batch_size, 8732], 被选中的正例和负例序号为True,其余为False。 final_mask = tf.stop_gradient( tf.logical_or( tf.reshape( tf.logical_and(batch_negtive_mask, selected_neg_mask), [-1]), positive_mask)) total_examples = tf.count_nonzero(final_mask) #假设batch个图片总共有m个正例,n个负例。 #cls_pred:[m+n] cls_pred = tf.boolean_mask(cls_pred, final_mask) #location_pred:[m,4] location_pred = tf.boolean_mask( location_pred, tf.stop_gradient(positive_mask)) flaten_cls_targets = tf.boolean_mask( tf.clip_by_value(flaten_cls_targets, 0, params['num_classes']), final_mask) flaten_loc_targets = tf.stop_gradient( tf.boolean_mask(flaten_loc_targets, positive_mask)) predictions = { 'classes': tf.argmax(cls_pred, axis=-1), 'probabilities': tf.reduce_max(tf.nn.softmax(cls_pred, name='softmax_tensor'), axis=-1), 'loc_predict': bboxes_pred } cls_accuracy = tf.metrics.accuracy(flaten_cls_targets, predictions['classes']) metrics = {'cls_accuracy': cls_accuracy} # Create a tensor named train_accuracy for logging purposes. tf.identity(cls_accuracy[1], name='cls_accuracy') tf.summary.scalar('cls_accuracy', cls_accuracy[1]) if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Calculate loss, which includes softmax cross entropy and L2 regularization. #cross_entropy = tf.cond(n_positives > 0, lambda: tf.losses.sparse_softmax_cross_entropy(labels=flaten_cls_targets, logits=cls_pred), lambda: 0.)# * (params['negative_ratio'] + 1.) #flaten_cls_targets=tf.Print(flaten_cls_targets, [flaten_loc_targets],summarize=50000) cross_entropy = tf.losses.sparse_softmax_cross_entropy( labels=flaten_cls_targets, logits=cls_pred) * (params['negative_ratio'] + 1.) # Create a tensor named cross_entropy for logging purposes. tf.identity(cross_entropy, name='cross_entropy_loss') tf.summary.scalar('cross_entropy_loss', cross_entropy) #loc_loss = tf.cond(n_positives > 0, lambda: modified_smooth_l1(location_pred, tf.stop_gradient(flaten_loc_targets), sigma=1.), lambda: tf.zeros_like(location_pred)) loc_loss = modified_smooth_l1(location_pred, flaten_loc_targets, sigma=1.) #loc_loss = modified_smooth_l1(location_pred, tf.stop_gradient(gtargets)) loc_loss = tf.reduce_mean(tf.reduce_sum(loc_loss, axis=-1), name='location_loss') tf.summary.scalar('location_loss', loc_loss) tf.losses.add_loss(loc_loss) l2_loss_vars = [] for trainable_var in tf.trainable_variables(): if '_bn' not in trainable_var.name: if 'conv4_3_scale' not in trainable_var.name: l2_loss_vars.append(tf.nn.l2_loss(trainable_var)) else: l2_loss_vars.append(tf.nn.l2_loss(trainable_var) * 0.1) # Add weight decay to the loss. We exclude the batch norm variables because # doing so leads to a small improvement in accuracy. total_loss = tf.add(cross_entropy + loc_loss, tf.multiply(params['weight_decay'], tf.add_n(l2_loss_vars), name='l2_loss'), name='total_loss') if mode == tf.estimator.ModeKeys.TRAIN: global_step = tf.train.get_or_create_global_step() lr_values = [ params['learning_rate'] * decay for decay in params['lr_decay_factors'] ] learning_rate = tf.train.piecewise_constant( tf.cast(global_step, tf.int32), [int(_) for _ in params['decay_boundaries']], lr_values) truncated_learning_rate = tf.maximum(learning_rate, tf.constant( params['end_learning_rate'], dtype=learning_rate.dtype), name='learning_rate') # Create a tensor named learning_rate for logging purposes. tf.summary.scalar('learning_rate', truncated_learning_rate) optimizer = tf.train.MomentumOptimizer( learning_rate=truncated_learning_rate, momentum=params['momentum']) optimizer = tf.contrib.estimator.TowerOptimizer(optimizer) # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(total_loss, global_step) else: train_op = None return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=total_loss, train_op=train_op, eval_metric_ops=metrics, scaffold=tf.train.Scaffold(init_fn=get_init_fn()))
def streaming_tp_fp_arrays(num_gbboxes, tp, fp, scores, remove_zero_scores=True, metrics_collections=None, updates_collections=None, name=None): """Streaming computation of True and False Positive arrays. This metrics also keeps track of scores and number of grountruth objects. """ # Input dictionaries: dict outputs as streaming metrics. if isinstance(scores, dict) or isinstance(fp, dict): d_values = {} d_update_ops = {} for c in num_gbboxes.keys(): scope = 'streaming_tp_fp_%s' % c v, up = streaming_tp_fp_arrays(num_gbboxes[c], tp[c], fp[c], scores[c], remove_zero_scores, metrics_collections, updates_collections, name=scope) d_values[c] = v d_update_ops[c] = up return d_values, d_update_ops # Input Tensors... with variable_scope.variable_scope(name, 'streaming_tp_fp', [num_gbboxes, tp, fp, scores]): num_gbboxes = math_ops.to_int64(num_gbboxes) scores = math_ops.to_float(scores) stype = tf.bool tp = tf.cast(tp, stype) fp = tf.cast(fp, stype) # Reshape TP and FP tensors and clean away 0 class values. scores = tf.reshape(scores, [-1]) tp = tf.reshape(tp, [-1]) fp = tf.reshape(fp, [-1]) # Remove TP and FP both false. mask = tf.logical_or(tp, fp) if remove_zero_scores: rm_threshold = 1e-4 mask = tf.logical_and(mask, tf.greater(scores, rm_threshold)) scores = tf.boolean_mask(scores, mask) tp = tf.boolean_mask(tp, mask) fp = tf.boolean_mask(fp, mask) # Local variables accumlating information over batches. v_nobjects = _create_local('v_num_gbboxes', shape=[], dtype=tf.int64) v_ndetections = _create_local('v_num_detections', shape=[], dtype=tf.int32) v_scores = _create_local('v_scores', shape=[0, ]) v_tp = _create_local('v_tp', shape=[0, ], dtype=stype) v_fp = _create_local('v_fp', shape=[0, ], dtype=stype) # Update operations. nobjects_op = state_ops.assign_add(v_nobjects, tf.reduce_sum(num_gbboxes)) ndetections_op = state_ops.assign_add(v_ndetections, tf.size(scores, out_type=tf.int32)) scores_op = state_ops.assign(v_scores, tf.concat([v_scores, scores], axis=0), validate_shape=False) tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp], axis=0), validate_shape=False) fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp], axis=0), validate_shape=False) # Value and update ops. val = (v_nobjects, v_ndetections, v_tp, v_fp, v_scores) with ops.control_dependencies([nobjects_op, ndetections_op, scores_op, tp_op, fp_op]): update_op = (nobjects_op, ndetections_op, tp_op, fp_op, scores_op) if metrics_collections: ops.add_to_collections(metrics_collections, val) if updates_collections: ops.add_to_collections(updates_collections, update_op) return val, update_op
def _local_perm(inputs, targets, is_masked, perm_size, seq_len): """ Sample a permutation of the factorization order, and create an attention mask accordingly. Args: inputs: int64 Tensor in shape [seq_len], input ids. targets: int64 Tensor in shape [seq_len], target ids. is_masked: bool Tensor in shape [seq_len]. True means being selected for partial prediction. perm_size: the length of longest permutation. Could be set to be reuse_len. Should not be larger than reuse_len or there will be data leaks. seq_len: int, sequence length. """ # Generate permutation indices index = tf.range(seq_len, dtype=tf.int64) index = tf.transpose(tf.reshape(index, [-1, perm_size])) index = tf.random_shuffle(index) index = tf.reshape(tf.transpose(index), [-1]) # `perm_mask` and `target_mask` # non-functional tokens non_func_tokens = tf.logical_not( tf.logical_or(tf.equal(inputs, SEP_ID), tf.equal(inputs, CLS_ID))) non_mask_tokens = tf.logical_and(tf.logical_not(is_masked), non_func_tokens) masked_or_func_tokens = tf.logical_not(non_mask_tokens) # Set the permutation indices of non-masked (& non-funcional) tokens to the # smallest index (-1): # (1) they can be seen by all other positions # (2) they cannot see masked positions, so there won"t be information leak smallest_index = -tf.ones([seq_len], dtype=tf.int64) rev_index = tf.where(non_mask_tokens, smallest_index, index) # Create `target_mask`: non-funcional and maksed tokens # 1: use mask as input and have loss # 0: use token (or [SEP], [CLS]) as input and do not have loss target_tokens = tf.logical_and(masked_or_func_tokens, non_func_tokens) target_mask = tf.cast(target_tokens, tf.float32) # Create `perm_mask` # `target_tokens` cannot see themselves self_rev_index = tf.where(target_tokens, rev_index, rev_index + 1) # 1: cannot attend if i <= j and j is not non-masked (masked_or_func_tokens) # 0: can attend if i > j or j is non-masked perm_mask = tf.logical_and(self_rev_index[:, None] <= rev_index[None, :], masked_or_func_tokens) perm_mask = tf.cast(perm_mask, tf.float32) # new target: [next token] for LM and [curr token] (self) for PLM new_targets = tf.concat([inputs[0:1], targets[:-1]], axis=0) # construct inputs_k inputs_k = inputs # construct inputs_q inputs_q = target_mask return perm_mask, new_targets, target_mask, inputs_k, inputs_q
def _static_subsample(self, indicator, batch_size, labels): """Returns subsampled minibatch. Args: indicator: boolean tensor of shape [N] whose True entries can be sampled. N should be a complie time constant. batch_size: desired batch size. This scalar cannot be None. labels: boolean tensor of shape [N] denoting positive(=True) and negative (=False) examples. N should be a complie time constant. Returns: sampled_idx_indicator: boolean tensor of shape [N], True for entries which are sampled. It ensures the length of output of the subsample is always batch_size, even when number of examples set to True in indicator is less than batch_size. Raises: ValueError: if labels and indicator are not 1D boolean tensors. """ # Check if indicator and labels have a static size. if not indicator.shape.is_fully_defined(): raise ValueError( 'indicator must be static in shape when is_static is' 'True') if not labels.shape.is_fully_defined(): raise ValueError('labels must be static in shape when is_static is' 'True') if not isinstance(batch_size, int): raise ValueError( 'batch_size has to be an integer when is_static is' 'True.') input_length = tf.shape(indicator)[0] # Set the number of examples set True in indicator to be at least # batch_size. num_true_sampled = tf.reduce_sum(tf.cast(indicator, tf.float32)) additional_false_sample = tf.less_equal( tf.cumsum(tf.cast(tf.logical_not(indicator), tf.float32)), batch_size - num_true_sampled) indicator = tf.logical_or(indicator, additional_false_sample) # Shuffle indicator and label. Need to store the permutation to restore the # order post sampling. permutation = tf.random_shuffle(tf.range(input_length)) indicator = ops.matmul_gather_on_zeroth_axis( tf.cast(indicator, tf.float32), permutation) labels = ops.matmul_gather_on_zeroth_axis(tf.cast(labels, tf.float32), permutation) # index (starting from 1) when indicator is True, 0 when False indicator_idx = tf.where(tf.cast(indicator, tf.bool), tf.range(1, input_length + 1), tf.zeros(input_length, tf.int32)) # Replace -1 for negative, +1 for positive labels signed_label = tf.where( tf.cast(labels, tf.bool), tf.ones(input_length, tf.int32), tf.scalar_mul(-1, tf.ones(input_length, tf.int32))) # negative of index for negative label, positive index for positive label, # 0 when indicator is False. signed_indicator_idx = tf.multiply(indicator_idx, signed_label) sorted_signed_indicator_idx = tf.nn.top_k(signed_indicator_idx, input_length, sorted=True).values [num_positive_samples, num_negative_samples ] = self._get_num_pos_neg_samples(sorted_signed_indicator_idx, batch_size) sampled_idx = self._get_values_from_start_and_end( sorted_signed_indicator_idx, num_positive_samples, num_negative_samples, batch_size) # Shift the indices to start from 0 and remove any samples that are set as # False. sampled_idx = tf.abs(sampled_idx) - tf.ones(batch_size, tf.int32) sampled_idx = tf.multiply( tf.cast(tf.greater_equal(sampled_idx, tf.constant(0)), tf.int32), sampled_idx) sampled_idx_indicator = tf.cast( tf.reduce_sum(tf.one_hot(sampled_idx, depth=input_length), axis=0), tf.bool) # project back the order based on stored permutations reprojections = tf.one_hot(permutation, depth=input_length, dtype=tf.float32) return tf.cast( tf.tensordot(tf.cast(sampled_idx_indicator, tf.float32), reprojections, axes=[0, 0]), tf.bool)
def run_trials(self, pol_x, val_x, stimulus, truth): # Inputs: # pol_x: Initial values for hidden units of the policy network # val_x: Initial values for hidden units of the value network # stimulus: The stimuli, # time poitns x 3 (fixation, rightward evidence, leftward evidence) # truth: Vector that has (# trials) elements, each specifying true direction for that trial ############# Progress trial over time ############## # Unstack input data across the time dimension input_data = tf.unstack(stimulus, axis=0) # Put truth in proper shape truth = tf.expand_dims(truth, axis=1) # Define tensor flow objects for fixation, so the selected action can be compared to fixate = tf.constant(np.zeros((par['batch_train_size'], 1)), dtype=np.float64) # Define left and right direction choices, so action can be compared to right_choice = tf.constant(np.ones((par['batch_train_size'], 1)), dtype=np.float64) left_choice = tf.constant(-np.ones((par['batch_train_size'], 1)), dtype=np.float64) # Array to keep history of hidden units activity (x=current, r=firing rate) of the policy network self.pol_x_history = [] self.pol_r_history = [] # Array to keep output of the policy network self.pol_out_history = [] self.pol_out_history0 = [] # Array to keep history of hidden units activity (x=current, r=firing rate) of the value Netwrok self.val_x_history = [] self.val_r_history = [] # Array to keep output of the value network self.val_out_history = [] ############# Decision and reward array initiation########## # Array to hold the actions of the agent at any point in time # 0 is fixation, 1 is saccade right, 2 is saccade left # Array to keep all the selected actions self.actions = [] self.action_array = [] # Array version of action (binary) # Array to keep track of the reward over time self.reward = [] # Array to keep track of discount applied to each reward at each time point self.discount = [] # See page 17 of Song et al., 2017 # Array to keep track of logpi, i.e. probability of the selected action at each point in time # This will be used to calculate J later self.logpi = [] # Initialize a time mask to keep track of when each trial has been aborted or finished self.time_mask = [ ] #tf.constant(np.ones((self.total_dur, par['batch_train_size'])), dtype=np.int32) self.ideal = [] # Initialize an array for the input to the value network that reflects the chosen action # The array keeps track of the chosen action at each time point # 3 actions are possible, hence number 3 at the third dimension # A [1 0 0] at the third dimension: fixate # A [0 1 0] at the third dimension: Go right # A [0 0 1] at the third dimension: Go left # self.action_array = tf.constant(np.zeros((self.total_dur, par['batch_train_size'], 3)), dtype=np.int32) # self.action_array = tf.constant(np.zeros((par['batch_train_size'], 3)), dtype=np.float32) temp_fixate = np.zeros((par['batch_train_size'], 3), dtype=np.bool_) temp_fixate[:, 1] = 1 fixate_array = tf.constant(temp_fixate, dtype=tf.bool) # Array to reflect choosing the right direction #temp_right = np.zeros((self.total_dur, par['batch_train_size'], 3)) #temp_right[self.t, self.b, 1] = 1 temp_right = np.zeros((par['batch_train_size'], 3), dtype=np.bool_) temp_right[:, 2] = 1 right_array = tf.constant(temp_right, dtype=tf.bool) # Array to reflect choosing the left direction #temp_left = np.zeros((self.total_dur, par['batch_train_size'], 3)) #temp_left[self.t, self.b, 2] = 1 temp_left = np.zeros((par['batch_train_size'], 3), dtype=np.bool_) temp_left[:, 0] = 1 left_array = tf.constant(temp_left, dtype=tf.bool) self.temp_l = [] self.temp_r = [] self.temp1 = [] self.temp2 = [] # Go over input at each point in time (this_u) self.t = 0 # Variable to keep track of time (in # itme points) cont_flag = tf.constant(np.ones((3, par['batch_train_size'])), dtype=tf.float64) # Continuation of trial flag for this_u in input_data: ''' 1) Policy network: Given the input and previous hidden unit activity, get activity of hidden units at next time step x is the input current to each cell and r is be firing rate ''' #pdb.set_trace() this_input = tf.multiply(cont_flag, this_u) pol_x, pol_r, other_params = pol_cell(this_input, pol_x) # Append current activity of the policy network units to their history self.pol_x_history.append(tf.transpose(pol_x)) self.pol_r_history.append(tf.transpose(pol_r)) ''' # 2) Policy network: Given the hidden state firing rate at time t, get output at time t (policy) ''' with tf.variable_scope('policy_output', reuse=True): pol_W_out = tf.get_variable('pol_W_out', dtype=tf.float64) pol_b_out = tf.get_variable('pol_b_out', dtype=tf.float64) pol_out_0 = tf.matmul(pol_W_out, pol_r) + pol_b_out # Linear part, equation 6 pol_out = tf.nn.softmax(pol_out_0, 0) # Softmax part, equation 7 #pol_out = pol_out_0 ############ Create ideal pol out to see performance of the system for checking purposes bi = 1e-20 ideal_pol_out = np.zeros((3, par['batch_train_size'])) + bi if self.t <= ( self.stim_dur + self.fix_dur ): # During fixation period, action must be 0 (i.e. fixation) ideal_pol_out[1, :] = 1 - 2 * bi ideal = tf.constant(ideal_pol_out, dtype=np.float64) if self.t > ( self.stim_dur + self.fix_dur ): # During decision period, action must be making a saccade to thr right direction to get a reward #pdb.set_trace() temp_l = tf.equal( truth, tf.constant(-np.ones((par['batch_train_size'], 1)), dtype=tf.float64)) temp_l = tf.transpose(temp_l) temp_l0 = np.zeros_like(ideal_pol_out) temp_l0[0, :] = 1 temp_l = tf.logical_and(tf.tile(temp_l, [3, 1]), tf.constant(temp_l0, dtype=tf.bool)) temp_r = tf.equal( truth, tf.constant(np.ones((par['batch_train_size'], 1)), dtype=tf.float64)) temp_r = tf.transpose(temp_r) temp_r0 = np.zeros_like(ideal_pol_out) temp_r0[2, :] = 1 temp_r = tf.logical_and(tf.tile(temp_r, [3, 1]), tf.constant(temp_r0, dtype=tf.bool)) ideal = tf.constant( ideal_pol_out, dtype=np.float64) + (1 - 3 * bi) * tf.cast( tf.logical_or(temp_l, temp_r), dtype=tf.float64) self.temp_l.append(temp_l) self.temp_r.append(temp_r) #pdb.set_trace() pol_out = 1 * pol_out + 0 * ideal self.ideal.append(ideal) # Append current output of the policy network to its history self.pol_out_history.append(pol_out) self.pol_out_history0.append(pol_out_0) ''' # 3) Choose action Given the output of the policy network, which specifies probabilities, choose an action ''' # The multinomial will generate a number in [0, 2] range, by subtracting it by 1, we bring it to the [-1, 1] range # The multinomial function of tensorflow requires logits, hence the log this_action = tf.multinomial( tf.log(tf.transpose(pol_out)), 1 ) - 1 # Do not remove the log!, or will produce samples not from the given distribution! this_action = tf.cast(this_action, dtype=tf.float64) # I just need to do it as an input to the value network, otherwise, having the actions vector as is for reward calculation is better self.actions.append(this_action) # 5) Given the selected action for each batch (trial), calculate the state of the system and its reward action_array = tf.constant(np.zeros((par['batch_train_size'], 3)), dtype=tf.bool) # Update the action array based on chosen actions temp1 = tf.logical_or(tf.logical_and(tf.tile(tf.equal(this_action, right_choice), [1, 3]), right_array), \ tf.logical_and(tf.tile(tf.equal(this_action, left_choice), [1, 3]), left_array)) temp2 = tf.logical_or( temp1, tf.logical_and(tf.tile(tf.equal(this_action, fixate), [1, 3]), fixate_array)) action_array = tf.logical_or(action_array, temp2) self.temp1.append(temp1) self.temp2.append(temp2) action_array = tf.cast(action_array, dtype=tf.float64) # Update action in array form self.action_array.append(action_array) # Caclaulte reward if self.t <= ( self.stim_dur + self.fix_dur ): # During fixation period, action must be 0 (i.e. fixation) # If fixatiob is kept, reward is 0 # If fixation is broken, reward is -1 (punishment) # So we just subtract 1 from the equality check this_reward = tf.cast(tf.equal(fixate, this_action), dtype=tf.float64) - 1 if self.t > ( self.stim_dur + self.fix_dur ): # During decision period, action must be making a saccade to thr right direction to get a reward # If fixation is broken, reward is 0 (no punishment) # If saccade to the correct target, reward is 1 # If saccade to the wrong target, reward is 0 (no punishment) this_reward = 1 * tf.cast( tf.equal(truth, this_action), dtype=tf.float64 ) #- 1 #*tf.cast(tf.equal(fixate, this_action), dtype=tf.float64) # Should the trial continue? Update the cont_flag # As long as the obtained reward is 0, the trial continues cont_flag = tf.multiply( cont_flag, tf.tile( tf.cast(tf.equal(tf.transpose(this_reward), 0), dtype=tf.float64), [3, 1])) # Update time mask if self.t == 0: # Always include time point 0 this_mask = tf.constant(np.ones((par['batch_train_size'], 1)), dtype=tf.bool) self.time_mask.append(this_mask) this_mask = tf.logical_and(this_mask, tf.equal(fixate, this_action)) else: # After time point 0 # Exclude a time point if it has already been excluded or ... # if fixation gets broken self.time_mask.append(this_mask) this_mask = tf.logical_and(this_mask, tf.equal(fixate, this_action)) # Append the current reward to the corresponding arrays self.reward.append(this_reward) # Calculate logpolicy (component in equation 3), i.e. P(selected action at each time point) #logpi = tf.multiply(pol_out, tf.cast(tf.transpose(action_array),dtype=tf.float64)) logpi = tf.multiply( pol_out, tf.cast(tf.transpose(action_array), dtype=tf.float64)) logpi = tf.log(tf.reduce_sum(logpi, axis=0)) self.logpi.append(logpi) # Update reward discount according to page 17 of Song et al., 2017 self.discount.append( np.exp(-self.t * par['dt'] / par['discount_time_constant'])) ''' 4) Value network: Given activity of policy network units and actions up to current time, calculate activity of hidden units in the value network x is the input current to each cell and r is be firing rate ''' # Concatenate the actions (stored in self.value_nput) with activity of the policy netwrok units activity_input = pol_r # prepare the activity array for concatenation action_input = tf.transpose( tf.cast(action_array, dtype=tf.float64 )) # Prepare the action array for concatenation self.value_input = tf.concat( [activity_input, action_input], axis=0) # Concatenate the two along the zeroth axis # Calculate activity of hidden unit val_x, val_r, other_params = val_cell(self.value_input, val_x) # Append current activity of the policy network units to their history self.val_x_history.append(tf.transpose(val_x)) self.val_r_history.append(tf.transpose(val_r)) ''' 5) Value network: Given the hidden state activity at time t, get output at time t (predicted reward) ''' with tf.variable_scope('value_output', reuse=True): val_W_out = tf.get_variable('val_W_out', dtype=tf.float64) val_b_out = tf.get_variable('val_b_out', dtype=tf.float64) val_out = tf.matmul(val_W_out, val_r) + val_b_out # Linear part # Append current output of the policy network to its history self.val_out_history.append(tf.squeeze(val_out)) self.t = self.t + 1 # Increment time point # Reshape the action, reward, and logpi arrays to # time points x # batches (trials) self.reward = tf.squeeze( tf.stack(self.reward) ) #tf.reshape(self.reward, [self.total_dur, par['batch_train_size']]) self.actions = tf.squeeze( tf.stack(self.actions) ) #tf.reshape(self.actions, [self.total_dur, par['batch_train_size']]) self.action_array = tf.stack(self.action_array) self.time_mask = tf.squeeze(tf.stack(self.time_mask)) self.time_mask = tf.cast(self.time_mask, dtype=tf.float64) self.logpi = tf.stack(self.logpi)
def rotate_image_tensor(image, angle, mode='black'): """ Rotates a 3D tensor (HWD), which represents an image by given radian angle. New image has the same size as the input image. mode controls what happens to border pixels. mode = 'black' results in black bars (value 0 in unknown areas) mode = 'white' results in value 255 in unknown areas mode = 'ones' results in value 1 in unknown areas mode = 'repeat' keeps repeating the closest pixel known """ s = tf.shape(image) assert s.get_shape()[0] == 3, "Input needs to be 3D." assert (mode == 'repeat') or (mode == 'black') or (mode == 'white') or (mode == 'ones'), "Unknown boundary mode." image_center = [tf.floor(tf.cast(s[0]/2, tf.float32)), tf.floor(tf.cast(s[1]/2, tf.float32))] # Coordinates of new image coord1 = tf.range(s[0]) coord2 = tf.range(s[1]) # Create vectors of those coordinates in order to vectorize the image coord1_vec = tf.tile(coord1, [s[1]]) coord2_vec_unordered = tf.tile(coord2, [s[0]]) coord2_vec_unordered = tf.reshape(coord2_vec_unordered, [s[0], s[1]]) coord2_vec = tf.reshape(tf.transpose(coord2_vec_unordered, [1, 0]), [-1]) # center coordinates since rotation center is supposed to be in the image center coord1_vec_centered = coord1_vec - tf.to_int32(image_center[0]) coord2_vec_centered = coord2_vec - tf.to_int32(image_center[1]) coord_new_centered = tf.cast(tf.stack([coord1_vec_centered, coord2_vec_centered]), tf.float32) # Perform backward transformation of the image coordinates rot_mat_inv = tf.dynamic_stitch([0, 1, 2, 3], [tf.cos(angle), tf.sin(angle), -tf.sin(angle), tf.cos(angle)]) rot_mat_inv = tf.reshape(rot_mat_inv, shape=[2, 2]) coord_old_centered = tf.matmul(rot_mat_inv, coord_new_centered) # Find nearest neighbor in old image coord1_old_nn = tf.cast(tf.round(coord_old_centered[0, :] + image_center[0]), tf.int32) coord2_old_nn = tf.cast(tf.round(coord_old_centered[1, :] + image_center[1]), tf.int32) # Clip values to stay inside image coordinates if mode == 'repeat': coord_old1_clipped = tf.minimum(tf.maximum(coord1_old_nn, 0), s[0]-1) coord_old2_clipped = tf.minimum(tf.maximum(coord2_old_nn, 0), s[1]-1) else: outside_ind1 = tf.logical_or(tf.greater(coord1_old_nn, s[0]-1), tf.less(coord1_old_nn, 0)) outside_ind2 = tf.logical_or(tf.greater(coord2_old_nn, s[1]-1), tf.less(coord2_old_nn, 0)) outside_ind = tf.logical_or(outside_ind1, outside_ind2) coord_old1_clipped = tf.boolean_mask(coord1_old_nn, tf.logical_not(outside_ind)) coord_old2_clipped = tf.boolean_mask(coord2_old_nn, tf.logical_not(outside_ind)) coord1_vec = tf.boolean_mask(coord1_vec, tf.logical_not(outside_ind)) coord2_vec = tf.boolean_mask(coord2_vec, tf.logical_not(outside_ind)) coord_old_clipped = tf.cast(tf.transpose(tf.stack([coord_old1_clipped, coord_old2_clipped]), [1, 0]), tf.int32) # Coordinates of the new image coord_new = tf.transpose(tf.cast(tf.stack([coord1_vec, coord2_vec]), tf.int32), [1, 0]) num_channels = image.get_shape().as_list()[2] image_channel_list = tf.split(image, num_channels, axis=2) image_rotated_channel_list = list() for image_channel in image_channel_list: image_chan_new_values = tf.gather_nd(tf.squeeze(image_channel), coord_old_clipped) if (mode == 'black') or (mode == 'repeat'): background_color = 0 elif mode == 'ones': background_color = 1 elif mode == 'white': background_color = 255 else: background_color = 0 image_rotated_channel_list.append(tf.sparse_to_dense(coord_new, [s[0], s[1]], image_chan_new_values, background_color, validate_indices=False)) image_rotated = tf.transpose(tf.stack(image_rotated_channel_list), [1, 2, 0]) return image_rotated
def _decode_record(FLAGS, record, num_predict, seq_len, use_bfloat16=False, truncate_seq=False, stride=1): max_seq_length = seq_len record_spec = { "input_ori_ids": tf.FixedLenFeature([max_seq_length], tf.int64), "segment_ids": tf.FixedLenFeature([max_seq_length], tf.int64) } if FLAGS.sample_strategy in ["whole_word", "word_span"]: tf.logging.info("Add `boundary` spec for %s", FLAGS.sample_strategy) record_spec["boundary"] = tf.VarLenFeature(tf.int64) example = tf.parse_single_example(record, record_spec) inputs = example.pop("input_ori_ids") if FLAGS.sample_strategy in ["whole_word", "word_span"]: boundary = tf.sparse.to_dense(example.pop("boundary")) else: boundary = None if truncate_seq and stride > 1: tf.logging.info("Truncate pretrain sequence with stride %d", stride) # seq_len = 8, stride = 2: # [cls 1 2 sep 4 5 6 sep] => [cls 1 2 sep 4 5 sep pad] padding = tf.constant([FLAGS.sep_id] + [FLAGS.pad_id] * (stride - 1), dtype=inputs.dtype) inputs = tf.concat([inputs[:-stride], padding], axis=0) if boundary is not None: valid_boundary_mask = boundary < seq_len - stride boundary = tf.boolean_mask(boundary, valid_boundary_mask) is_target, target_mask = _online_sample_masks(FLAGS, inputs, seq_len, num_predict, boundary=boundary, stride=stride) masked_input = discrepancy_correction(FLAGS, inputs, is_target, seq_len) masked_input = tf.reshape(masked_input, [max_seq_length]) is_mask = tf.equal(masked_input, FLAGS.mask_id) is_pad = tf.equal(masked_input, FLAGS.pad_id) origin_input_mask = tf.equal(inputs, FLAGS.pad_id) masked_input *= (1 - tf.cast(origin_input_mask, dtype=tf.int64)) example["masked_input"] = masked_input example["origin_input"] = inputs example["is_target"] = tf.cast(is_target, dtype=tf.int64) * ( 1 - tf.cast(origin_input_mask, dtype=tf.int64)) # example["input_mask"] = tf.cast(tf.logical_or(is_mask, is_pad), tf.float32) # example["pad_mask"] = tf.cast(is_pad, tf.float32) input_mask = tf.logical_or(tf.logical_or(is_mask, is_pad), origin_input_mask) example["masked_mask"] = 1.0 - tf.cast(tf.logical_or(is_mask, is_pad), dtype=tf.float32) pad_mask = tf.logical_or(origin_input_mask, is_pad) example["pad_mask"] = 1.0 - tf.cast(pad_mask, tf.float32) # create target mapping create_target_mapping(example, is_target, seq_len, num_predict, target_mask=target_mask, target=inputs) example["masked_lm_positions"] = tf.argmax(example['target_mapping'], axis=-1) example["masked_lm_weights"] = example['target_mask'] example["masked_lm_ids"] = example['target'] # type cast for example convert_example(example, use_bfloat16) for k, v in example.items(): tf.logging.info("%s: %s", k, v) return example
def _get_qp_indices(g, low, high, x, eps=1e-6): c = tf.logical_or(tf.logical_and(tf.abs(x - low) < eps, g > 0), tf.logical_and(tf.abs(high - x) < eps, g < 0)) f = tf.logical_not(c) return f, c
def _body(step, finished, state, inputs, outputs, attention, cum_log_probs, extra_vars): # Get log probs from the model. result = symbols_to_logits_fn(inputs, step, state) logits, state = result[0], result[1] attn = result[2] if len(result) > 2 else None logits = tf.cast(logits, tf.float32) # Penalize or force EOS. batch_size, vocab_size = misc.shape_list(logits) eos_max_prob = tf.one_hot( tf.fill([batch_size], end_id), vocab_size, on_value=logits.dtype.max, off_value=logits.dtype.min, ) logits = tf.cond( step < minimum_iterations, true_fn=lambda: _penalize_token(logits, end_id), false_fn=lambda: tf.where( tf.broadcast_to(tf.expand_dims(finished, -1), tf.shape(logits) ), x=eos_max_prob, y=logits, ), ) log_probs = tf.nn.log_softmax(logits) # Run one decoding strategy step. ( output, next_cum_log_probs, finished, state, extra_vars, ) = decoding_strategy.step( step, sampler, log_probs, cum_log_probs, finished, state=state, attention=attn, **extra_vars, ) # Update loop vars. if attention_history: if attn is None: raise ValueError( "attention_history is set but the model did not return attention" ) attention = attention.write(step, tf.cast(attn, tf.float32)) outputs = outputs.write(step, output) cum_log_probs = tf.where(finished, x=cum_log_probs, y=next_cum_log_probs) finished = tf.logical_or(finished, tf.equal(output, end_id)) return ( step + 1, finished, state, output, outputs, attention, cum_log_probs, extra_vars, )
def train_op(self): if self.conv5: tvars = [ var for var in tf.trainable_variables() if var.op.name.startswith('text_objseg') or var.name.startswith('res5') or var.name.startswith('res4') or var.name.startswith('res3') ] else: tvars = [ var for var in tf.trainable_variables() if var.op.name.startswith('text_objseg') ] if self.freeze_bn: tvars = [ var for var in tvars if 'beta' not in var.name and 'gamma' not in var.name ] reg_var_list = [ var for var in tvars if var.op.name.find(r'DW') > 0 or var.name[-9:-2] == 'weights' ] print('Collecting variables for regularization:') for var in reg_var_list: print('\t%s' % var.name) print('Done.') # define loss self.target = tf.image.resize_bilinear(self.target_fine, [self.vf_h, self.vf_w]) self.cls_loss_c5 = loss.weighed_logistic_loss(self.up_c5, self.target_fine, 1, 1) self.cls_loss_c4 = loss.weighed_logistic_loss(self.up_c4, self.target_fine, 1, 1) # self.cls_loss_c3 = loss.weighed_logistic_loss(self.up_c3, self.target_fine, 1, 1) self.cls_loss = loss.weighed_logistic_loss(self.up, self.target_fine, 1, 1) self.cls_loss_all = 0.8 * self.cls_loss + 0.1 * self.cls_loss_c5 \ + 0.1 * self.cls_loss_c4 self.reg_loss = loss.l2_regularization_loss(reg_var_list, self.weight_decay) self.cost = self.cls_loss_all + self.reg_loss # learning rate self.train_step = tf.Variable(0, trainable=False) self.learning_rate = tf.train.polynomial_decay( self.start_lr, self.train_step, self.lr_decay_step, end_learning_rate=0.00001, power=0.9) # optimizer if self.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(self.learning_rate) else: raise ValueError("Unknown optimizer type %s!" % self.optimizer) # learning rate multiplier grads_and_vars = optimizer.compute_gradients(self.cost, var_list=tvars) var_lr_mult = {} for var in tvars: if var.op.name.find(r'biases') > 0: var_lr_mult[var] = 2.0 elif var.name.startswith('res5') or var.name.startswith( 'res4') or var.name.startswith('res3'): var_lr_mult[var] = 1.0 else: var_lr_mult[var] = 1.0 print('Variable learning rate multiplication:') for var in tvars: print('\t%s: %f' % (var.name, var_lr_mult[var])) print('Done.') grads_and_vars = [ ((g if var_lr_mult[v] == 1 else tf.multiply(var_lr_mult[v], g)), v) for g, v in grads_and_vars ] # training step update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train = optimizer.apply_gradients(grads_and_vars, global_step=self.train_step) # Summary in tensorboard tf.summary.scalar('loss_all', self.cls_loss_all) # tf.summary.scalar('loss_c3', self.cls_loss_c3) tf.summary.scalar('loss_c4', self.cls_loss_c4) tf.summary.scalar('loss_c5', self.cls_loss_c5) tf.summary.scalar('loss_last', self.cls_loss) pred = tf.convert_to_tensor(tf.cast(self.up > 0, tf.int32), tf.int32) labl = self.target_fine intersect = tf.reduce_sum(tf.cast( tf.logical_and(tf.cast(pred, tf.bool), tf.cast(labl, tf.bool)), tf.int32), axis=(1, 2, 3)) union = tf.reduce_sum(tf.cast( tf.logical_or(tf.cast(pred, tf.bool), tf.cast(labl, tf.bool)), tf.int32), axis=(1, 2, 3)) self.mIoU = tf.reduce_mean(tf.divide(intersect, union)) tf.summary.scalar('mean_IOU', self.mIoU) self.merged = tf.summary.merge_all()
def _finished_flags(self, topk_ids, state) -> tf.Tensor: new_finished_flags = tf.equal(topk_ids, self.eos_id) new_finished_flags = tf.logical_or( new_finished_flags, state[decoding_module.StateKeys.FINISHED_FLAGS]) return new_finished_flags
def construct(self, args, source_chars, target_chars, bow, eow): with self.session.graph.as_default(): if args.recodex: tf.get_variable_scope().set_initializer(tf.glorot_uniform_initializer(seed=42)) # Inputs self.sentence_lens = tf.placeholder(tf.int32, [None], name="sentence_lens") self.source_ids = tf.placeholder(tf.int32, [None, None], name="source_ids") self.source_seqs = tf.placeholder(tf.int32, [None, None], name="source_seqs") self.source_seq_lens = tf.placeholder(tf.int32, [None], name="source_seq_lens") self.target_ids = tf.placeholder(tf.int32, [None, None], name="target_ids") self.target_seqs = tf.placeholder(tf.int32, [None, None], name="target_seqs") self.target_seq_lens = tf.placeholder(tf.int32, [None], name="target_seq_lens") # Append EOW after target_seqs target_seqs = tf.reverse_sequence(self.target_seqs, self.target_seq_lens, 1) target_seqs = tf.pad(target_seqs, [[0, 0], [1, 0]], constant_values=eow) target_seq_lens = self.target_seq_lens + 1 target_seqs = tf.reverse_sequence(target_seqs, target_seq_lens, 1) # Encoder # TODO: Generate source embeddings for source chars, of shape [source_chars, args.char_dim]. # TODO: Embed the self.source_seqs using the source embeddings. # TODO: Using a GRU with dimension args.rnn_dim, process the embedded self.source_seqs # using forward RNN and store the resulting states into `source_states`. # Index the unique words using self.source_ids and self.target_ids. sentence_mask = tf.sequence_mask(self.sentence_lens) source_states = tf.boolean_mask(tf.nn.embedding_lookup(source_states, self.source_ids), sentence_mask) source_lens = tf.boolean_mask(tf.nn.embedding_lookup(self.source_seq_lens, self.source_ids), sentence_mask) target_seqs = tf.boolean_mask(tf.nn.embedding_lookup(target_seqs, self.target_ids), sentence_mask) target_lens = tf.boolean_mask(tf.nn.embedding_lookup(target_seq_lens, self.target_ids), sentence_mask) # Decoder # TODO: Generate target embeddings for target chars, of shape [target_chars, args.char_dim]. # TODO: Embed the target_seqs using the target embeddings. # TODO: Generate a decoder GRU with wimension args.rnn_dim. # TODO: Create a `decoder_layer` -- a fully connected layer with # target_chars neurons used in the decoder to classify into target characters. # The DecoderTraining will be used during training. It will output logits for each # target character. class DecoderTraining(tf.contrib.seq2seq.Decoder): @property def batch_size(self): return # TODO: Return size of the batch, using for example source_states size @property def output_dtype(self): return tf.float32 # Type for logits of target characters @property def output_size(self): return target_chars # Length of logits for every output def initialize(self, name=None): finished = # TODO: False if target_lens > 0, True otherwise states = # TODO: Initial decoder state to use inputs = # TODO: embedded BOW characters of shape [self.batch_size] using target embeddings. # You can use tf.fill to generate BOWs of appropriate size. return finished, inputs, states def step(self, time, inputs, states, name=None): outputs, states = # TODO: Run the decoder GRU cell using inputs and states. outputs = # TODO: Apply the decoder_layer on outputs. next_input = # TODO: Next input are words with index `time` in target_embedded. finished = # TODO: False if target_lens > time + 1, True otherwise. return outputs, states, next_input, finished output_layer, _, _ = tf.contrib.seq2seq.dynamic_decode(DecoderTraining()) self.predictions_training = tf.argmax(output_layer, axis=2, output_type=tf.int32) # The DecoderPrediction will be used during prediction. It will # directly output the predicted target characters. class DecoderPrediction(tf.contrib.seq2seq.Decoder): @property def batch_size(self): return # TODO: Return size of the batch, using for example source_states size @property def output_dtype(self): return tf.int32 # Type for predicted target characters @property def output_size(self): return 1 # Will return just one output def initialize(self, name=None): finished = # TODO: False of shape [self.batch_size]. states = # TODO: Initial decoder state to use. inputs = # TODO: embedded BOW characters of shape [self.batch_size] using target embeddings. # You can use tf.fill to generate BOWs of appropriate size. return finished, inputs, states def step(self, time, inputs, states, name=None): outputs, states = # TODO: Run the decoder GRU cell using inputs and states. outputs = # TODO: Apply the decoder_layer on outputs. outputs = # TODO: Use tf.argmax to choose most probable class (supply parameter `output_type=tf.int32`). next_input = # TODO: Embed `outputs` using target_embeddings finished = # TODO: True where outputs==eow, False otherwise # Use tf.equal for the comparison, Python's '==' is not overloaded return outputs, states, next_input, finished self.predictions, _, self.prediction_lens = tf.contrib.seq2seq.dynamic_decode( DecoderPrediction(), maximum_iterations=tf.reduce_max(source_lens) + 10) # Training weights = tf.sequence_mask(target_lens, dtype=tf.float32) loss = tf.losses.sparse_softmax_cross_entropy(target_seqs, output_layer, weights=weights) global_step = tf.train.create_global_step() self.training = tf.train.AdamOptimizer().minimize(loss, global_step=global_step, name="training") # Summaries accuracy_training = tf.reduce_all(tf.logical_or( tf.equal(self.predictions_training, target_seqs), tf.logical_not(tf.sequence_mask(target_lens))), axis=1) self.current_accuracy_training, self.update_accuracy_training = tf.metrics.mean(accuracy_training) minimum_length = tf.minimum(tf.shape(self.predictions)[1], tf.shape(target_seqs)[1]) accuracy = tf.logical_and( tf.equal(self.prediction_lens, target_lens), tf.reduce_all(tf.logical_or( tf.equal(self.predictions[:, :minimum_length], target_seqs[:, :minimum_length]), tf.logical_not(tf.sequence_mask(target_lens, maxlen=minimum_length))), axis=1)) self.current_accuracy, self.update_accuracy = tf.metrics.mean(accuracy) self.current_loss, self.update_loss = tf.metrics.mean(loss, weights=tf.reduce_sum(weights)) self.reset_metrics = tf.variables_initializer(tf.get_collection(tf.GraphKeys.METRIC_VARIABLES)) summary_writer = tf.contrib.summary.create_file_writer(args.logdir, flush_millis=10 * 1000) self.summaries = {} with summary_writer.as_default(), tf.contrib.summary.record_summaries_every_n_global_steps(10): self.summaries["train"] = [tf.contrib.summary.scalar("train/loss", self.update_loss), tf.contrib.summary.scalar("train/accuracy", self.update_accuracy_training)] with summary_writer.as_default(), tf.contrib.summary.always_record_summaries(): for dataset in ["dev", "test"]: self.summaries[dataset] = [tf.contrib.summary.scalar(dataset + "/loss", self.current_loss), tf.contrib.summary.scalar(dataset + "/accuracy", self.current_accuracy)] # Initialize variables self.session.run(tf.global_variables_initializer()) with summary_writer.as_default(): tf.contrib.summary.initialize(session=self.session, graph=self.session.graph)
def loop_fn(time, cell_output, cell_state, loop_state): if cell_output is None: # time == 0 next_cell_state = encoder_states next_input = tf.tile(go_embedding, to_T([N, 1])) else: # time > 0 next_cell_state = cell_state # compute the attention map over the input sequence # a_raw has shape [T, N, 1] att_raw = tf.reduce_sum(tf.tanh( tf.nn.xw_plus_b(cell_output, W_a, b_a) + self.encoder_h_transformed) * v, axis=2, keep_dims=True) # softmax along the first dimension (T) over not finished examples # att has shape [T, N, 1] att = tf.nn.softmax(att_raw, dim=0) * self.seq_not_finished att = att / tf.reduce_sum(att, axis=0, keep_dims=True) # d has shape [N, lstm_dim] d2 = tf.reduce_sum(att * self.encoder_outputs, axis=0) # token_scores has shape [N, num_vocab] token_scores = tf.nn.xw_plus_b( tf.concat([cell_output, d2], axis=1), W_y, b_y) # predict the next token (behavior depending on parameters) if sampling: # predicted_token has shape [N] logits = token_scores predicted_token = tf.cast( tf.reshape(tf.multinomial(token_scores, 1), [-1]), tf.int32) else: # predicted_token has shape [N] predicted_token = tf.cast(tf.argmax(token_scores, 1), tf.int32) if use_gt_layout is not None: predicted_token = ( gt_layout_batch[time - 1] * gt_layout_mult + predicted_token * pred_layout_mult) # token_prob has shape [N], the probability of the predicted token # although token_prob is not needed for predicting the next token # it is needed in output (for policy gradient training) # [N, num_vocab] # mask has shape [N, num_vocab] mask = tf.equal(mask_range, tf.reshape(predicted_token, [-1, 1])) all_token_probs = tl.activation.pixel_wise_softmax( token_scores) token_prob = tf.reduce_sum(all_token_probs * tf.cast(mask, tf.float32), axis=1) neg_entropy = tf.reduce_sum( all_token_probs * tf.log(tf.maximum(1e-5, all_token_probs)), axis=1) # is_eos_predicted is a [N] bool tensor, indicating whether # <eos> has already been predicted previously in each sequence is_eos_predicted = loop_state[2] predicted_token_old = predicted_token # if <eos> has already been predicted, now predict <eos> with # prob 1 predicted_token = tf.where(is_eos_predicted, all_eos_pred, predicted_token) token_prob = tf.where(is_eos_predicted, all_one_prob, token_prob) neg_entropy = tf.where(is_eos_predicted, all_zero_entropy, neg_entropy) is_eos_predicted = tf.logical_or( is_eos_predicted, tf.equal(predicted_token_old, EOS_token)) # the prediction is from the cell output of the last step # timestep (t-1), feed it as input into timestep t next_input = tf.nn.embedding_lookup( embedding_mat, predicted_token) elements_finished = tf.greater_equal(time, T_max) # loop_state is a 5-tuple, representing # 1) the predicted_tokens # 2) the prob of predicted_tokens # 3) whether <eos> has already been predicted # 4) the negative entropy of policy (accumulated across timesteps) # 5) the attention if loop_state is None: # time == 0 # Write the predicted token into the output predicted_token_array = tf.TensorArray(dtype=tf.int32, size=T_max, infer_shape=False) token_prob_array = tf.TensorArray(dtype=tf.float32, size=T_max, infer_shape=False) att_array = tf.TensorArray(dtype=tf.float32, size=T_max, infer_shape=False) next_loop_state = (predicted_token_array, token_prob_array, tf.zeros(to_T([N]), dtype=tf.bool), tf.zeros(to_T([N]), dtype=tf.float32), att_array) else: # time > 0 t_write = time - 1 next_loop_state = (loop_state[0].write( t_write, predicted_token), loop_state[1].write( t_write, token_prob), is_eos_predicted, loop_state[3] + neg_entropy, loop_state[4].write(t_write, att)) return (elements_finished, next_input, next_cell_state, cell_output, next_loop_state)
def run_step2( time, maximum_iterations, pre_id, predict_ta, n_queue_ta, q_start_index, q_end_index, hidden_state, sos_id, non_terminal_id, eos_id, left_bracket_id, right_bracket_id, seq_end, ): cur_id = pre_id cur_embed = tf.reshape(tf.nn.embedding_lookup( self.tgt_embeddings, cur_id), shape=[1, -1]) def infer_true_fn(q_start_index, n_queue_ta, hidden_state): state = n_queue_ta.read(q_start_index) # state = tf.Print(state, [state]) q_start_index = q_start_index + 1 return ((state[0][:][:], state[1][:][:]), q_start_index, n_queue_ta) def infer_false_fn(q_start_index, n_queue_ta, hidden_state): return (hidden_state, q_start_index, n_queue_ta) # if previous id is sos or left_bracket, get previous hidden_state from n_queue_ta infer_condition = tf.logical_and( tf.logical_or(tf.equal(cur_id, sos_id), tf.equal(cur_id, left_bracket_id)), tf.less(q_start_index, q_end_index)) pre_state, q_start_index, n_queue_ta = tf.cond( infer_condition, true_fn=lambda: infer_true_fn(q_start_index, n_queue_ta, hidden_state), false_fn=lambda: infer_false_fn(q_start_index, n_queue_ta, hidden_state)) call_cell = lambda: cell(cur_embed, pre_state) def output_state_true_fn(pre_state): return (create_zero_array(shape=[1, cell.output_size], dtype=tf.float32), pre_state) # if is a seq_end, then return zeros output and unchanged hidden_state # else update output and state # infer_condition1 = tf.logical_and(tf.equal(cur_id, eos_id), seq_end) new_output, new_state = tf.cond( seq_end, true_fn=lambda: output_state_true_fn(pre_state), false_fn=call_cell) # new_output, new_state = call_cell() print('new_output:', new_output) logit = dense(new_output) print('logit:', logit) output_id = tf.reshape(tf.cast(tf.argmax(logit, axis=-1), dtype=tf.int32), shape=()) print('output_id:', output_id) def seq_end_true_fn(eos_id): return eos_id def seq_end_false_fn(output_id): return output_id output_id = tf.cond( seq_end, true_fn=lambda: seq_end_true_fn(eos_id), false_fn=lambda: seq_end_false_fn(output_id)) def infer_true_fn2(output_id, left_bracket_id): return left_bracket_id, tf.constant(False) def infer_false_fn2(output_id, eos_id, q_start_index, q_end_index, left_bracket_id): def inner_true_fn(): return tf.constant(True) def inner_false_fn(): return tf.constant(False) inner_condition = tf.logical_and( tf.equal(output_id, eos_id), tf.greater_equal(q_start_index, q_end_index)) seq_end = tf.cond(inner_condition, true_fn=lambda: inner_true_fn(), false_fn=lambda: inner_false_fn()) # possibly return eos_id return output_id, seq_end # n_queue_ta is not null, has a non_terminal_id infer_condition2 = tf.logical_and( tf.equal(output_id, eos_id), tf.less(q_start_index, q_end_index)) # if n_queue_ta has a non_terminal_id, continuously decode subsequence, should return an left_bracket_id # else it means an end of sequence, return an eos_id output_id, seq_end = tf.cond( infer_condition2, true_fn=lambda: infer_true_fn2(output_id, left_bracket_id), false_fn=lambda: infer_false_fn2( output_id, eos_id, q_start_index, q_end_index, left_bracket_id)) logit = tf.reshape(logit, shape=[tf.shape(logit)[-1]]) predict_ta = predict_ta.write(time, logit) def infer_true_fn3(state, n_queue_ta, q_end_index): n_queue_ta = n_queue_ta.write(q_end_index, state) q_end_index = q_end_index + 1 return (q_end_index, n_queue_ta) def infer_false_fn3(q_end_index, n_queue_ta): return (q_end_index, n_queue_ta) infer_condition3 = tf.equal(output_id, non_terminal_id) q_end_index, n_queue_ta = tf.cond( infer_condition3, true_fn=lambda: infer_true_fn3(new_state, n_queue_ta, q_end_index), false_fn=lambda: infer_false_fn3(q_end_index, n_queue_ta)) return (time + 1, maximum_iterations, output_id, predict_ta, n_queue_ta, q_start_index, q_end_index, new_state, sos_id, non_terminal_id, eos_id, left_bracket_id, right_bracket_id, seq_end)
def add_train_ops(self, model): """ Add training ops on top of the model """ ############## # Training ops ############## with tf.variable_scope('optimizer'): # Learning rate as a Variable so we can modify it self.learning_rate = tf.Variable(model.config.learning_rate, trainable=False, name='learning_rate') # Create the gradient descent optimizer with the given learning rate. optimizer = tf.train.MomentumOptimizer(self.learning_rate, model.config.momentum) # Training step op gvs = optimizer.compute_gradients(model.loss) if model.config.grad_clip_norm > 0: # Get gradient for deformable convolutions and scale them scaled_gvs = [] for grad, var in gvs: if 'offset_conv' in var.name: scaled_gvs.append((0.1 * grad, var)) if 'offset_mlp' in var.name: scaled_gvs.append((0.1 * grad, var)) else: scaled_gvs.append((grad, var)) # Clipping each gradient independantly capped_gvs = [ (tf.clip_by_norm(grad, model.config.grad_clip_norm), var) for grad, var in scaled_gvs ] # Clipping the whole network gradient (problematic with big network where grad == inf) # capped_grads, global_norm = tf.clip_by_global_norm([grad for grad, var in gvs], self.config.grad_clip_norm) # vars = [var for grad, var in gvs] # capped_gvs = [(grad, var) for grad, var in zip(capped_grads, vars)] extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(extra_update_ops): self.train_op = optimizer.apply_gradients(capped_gvs) else: extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(extra_update_ops): self.train_op = optimizer.apply_gradients(gvs) ############ # Result ops ############ # Add the Op to compare the logits to the labels during evaluation. with tf.variable_scope('results'): if len(model.config.ignored_label_inds) > 0: # Boolean mask of points that should be ignored ignored_bool = tf.zeros_like(model.labels, dtype=tf.bool) for ign_label in model.config.ignored_label_inds: ignored_bool = tf.logical_or(ignored_bool, model.labels == ign_label) # Collect logits and labels that are not ignored inds = tf.squeeze(tf.where(tf.logical_not(ignored_bool))) new_logits = tf.gather(model.logits, inds, axis=0) new_labels = tf.gather(model.labels, inds, axis=0) # Reduce label values in the range of logit shape reducing_list = tf.range(model.config.num_classes, dtype=tf.int32) inserted_value = tf.zeros((1, ), dtype=tf.int32) for ign_label in model.config.ignored_label_inds: reducing_list = tf.concat([ reducing_list[:ign_label], inserted_value, reducing_list[ign_label:] ], 0) new_labels = tf.gather(reducing_list, new_labels) # Metrics self.correct_prediction = tf.nn.in_top_k( new_logits, new_labels, 1) self.accuracy = tf.reduce_mean( tf.cast(self.correct_prediction, tf.float32)) self.prob_logits = tf.nn.softmax(new_logits) else: # Metrics self.correct_prediction = tf.nn.in_top_k( model.logits, model.labels, 1) self.accuracy = tf.reduce_mean( tf.cast(self.correct_prediction, tf.float32)) self.prob_logits = tf.nn.softmax(model.logits) return
def dynamic_interactive_decode(decoder, output_time_major=False, impute_finished=False, maximum_iterations=None, parallel_iterations=32, swap_memory=False, scope=None): with tf.variable_scope(scope, "decoder") as varscope: if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) if maximum_iterations is not None: maximum_iterations = tf.convert_to_tensor(maximum_iterations, dtype=tf.int32, name="maximum_iterations") if maximum_iterations.get_shape().ndims != 0: raise ValueError("maximum_iterations must be a scalar") initial_finished, initial_inputs, initial_state, initial_history_masking, initial_interesting = decoder.initialize() zero_outputs = _create_zero_outputs(decoder.output_size, decoder.output_dtype, decoder.batch_size) if maximum_iterations is not None: initial_finished = tf.logical_or( initial_finished, 0 >= maximum_iterations) initial_sequence_lengths = tf.zeros_like( initial_finished, dtype=tf.int32) initial_time = tf.constant(0, dtype=tf.int32) def _shape(batch_size, from_shape): if (not isinstance(from_shape, tf.TensorShape) or from_shape.ndims == 0): return tf.TensorShape(None) else: batch_size = tensor_util.constant_value(ops.convert_to_tensor(batch_size, name="batch_size")) return tf.TensorShape([batch_size]).concatenate(from_shape) def _create_ta(s, d): return tf.TensorArray( dtype=d, size=0, dynamic_size=True, element_shape=_shape(decoder.batch_size, s)) initial_outputs_ta = nest.map_structure(_create_ta, decoder.output_size, decoder.output_dtype) initial_hit_ta = nest.map_structure(_create_ta, decoder.hit_size, decoder.hit_dtype) def condition(unused_time, unused_outputs_ta, unused_state, unused_inputs, unused_history_masking, unused_hit, finished, unused_sequence_lengths): return math_ops.logical_not(math_ops.reduce_all(finished)) def body(time, outputs_ta, state, inputs, history_masking, hit_ta, finished, sequence_lengths): (next_outputs, decoder_state, next_inputs, next_history_masking, next_hit, decoder_finished) = \ decoder.step(time, inputs, state, history_masking) if decoder.tracks_own_finished: next_finished = decoder_finished else: next_finished = math_ops.logical_or(decoder_finished, finished) if maximum_iterations is not None: next_finished = math_ops.logical_or(next_finished, time + 1 >= maximum_iterations) next_sequence_lengths = array_ops.where(math_ops.logical_and(math_ops.logical_not(finished), next_finished), array_ops.fill(array_ops.shape(sequence_lengths), time + 1), sequence_lengths) nest.assert_same_structure(state, decoder_state) nest.assert_same_structure(outputs_ta, next_outputs) nest.assert_same_structure(inputs, next_inputs) nest.assert_same_structure(history_masking, next_history_masking) nest.assert_same_structure(hit_ta, next_hit) if impute_finished: emit = nest.map_structure(lambda out, zero: array_ops.where(finished, zero, out), next_outputs, zero_outputs) else: emit = next_outputs def _maybe_copy_state(new, cur): if isinstance(cur, tensor_array_ops.TensorArray): pass_through = True else: new.set_shape(cur.shape) pass_through = (new.shape.ndims == 0) return new if pass_through else array_ops.where(finished, cur, new) if impute_finished: next_state = nest.map_structure( _maybe_copy_state, decoder_state, state) else: next_state = decoder_state outputs_ta = nest.map_structure(lambda ta, out: ta.write(time, out), outputs_ta, emit) hit_ta = nest.map_structure(lambda ta, out: ta.write(time, out), hit_ta, next_hit) return (time + 1, outputs_ta, next_state, next_inputs, next_history_masking, hit_ta, next_finished, next_sequence_lengths) res = tf.while_loop(condition, body, loop_vars=[initial_time, initial_outputs_ta, initial_state, initial_inputs, initial_history_masking, initial_hit_ta, initial_finished, initial_sequence_lengths, ], parallel_iterations=parallel_iterations, swap_memory=swap_memory) final_outputs_ta = res[1] final_state = res[2] final_history_masking = res[4] final_hit_ta = res[5] final_sequence_lengths = res[-1] final_outputs = nest.map_structure(lambda ta: ta.stack(), final_outputs_ta) final_hit = nest.map_structure(lambda ta: ta.stack(), final_hit_ta) try: final_outputs, final_state = decoder.finalize(final_outputs, final_state, final_sequence_lengths) except NotImplementedError: pass if not output_time_major: final_outputs = nest.map_structure(rnn._transpose_batch_time, final_outputs) final_hit = nest.map_structure(rnn._transpose_batch_time, final_hit) return final_outputs, final_state, final_history_masking, final_hit, final_sequence_lengths
def compute_gradients(self, loss, var_list=None, *args, **kwargs): """Compute gradients to model variables from loss. Args: loss (tf.Tensor): Tensorflow loss to optimize. Returns: (tf.Operation): Compute gradient update to model followed by a clipping operation if `self.clip` is True. """ # freeze all variables except those with self.trainable_scope in their names if not isinstance(loss, list): loss = [loss] assert (len(loss) == len(self._optimizer_class)) if self.trainable_scope is not None: new_var_list = [ v for v in var_list if any([nm in v.name for nm in self.trainable_scope]) ] if len(new_var_list): var_list = new_var_list log.info("Only training variables in scope: %s" % self.trainable_scope) log.info("variables to be trained: %s" % var_list) if var_list is not None: num_trainable_params = sum( [np.prod(v.shape.as_list()) for v in var_list]) log.info("Number of Trainable Parameters: %d" % num_trainable_params) gvs_list = [] for opt_idx, curr_opt_func in enumerate(self._optimizers): gvs = curr_opt_func.compute_gradients(loss[opt_idx], var_list=var_list, *args, **kwargs) if self.clip: if self.clipping_method == "value": # gradient clipping. Some gradients returned are 'None' because # no relation between the variable and loss; so we skip those. gvs = [(tf.clip_by_value(grad, -self.clipping_value, self.clipping_value), var) for grad, var in gvs if grad is not None] elif self.clipping_method == "norm": print("USING GLOBAL NORM CLIPPING with clip_value %.2f" % self.clipping_value) gradients, variables = zip(*gvs) norm = tf.global_norm(gradients) if self.print_global_norm: norm = tf.Print(norm, [norm], message="grad_global_norm") true_fn = lambda: tf.constant(1.0) false_fn = lambda: tf.identity(norm) norm = tf.case([(tf.logical_or(tf.is_inf(norm), tf.is_nan(norm)), true_fn)], default=false_fn) gradients, global_norm = tf.clip_by_global_norm( gradients, self.clipping_value, use_norm=norm) gvs = zip(gradients, variables) else: raise ValueError( "optimizer.clip = True but you didn't specify a valid method in ['value', 'norm']" ) gvs_list.insert(opt_idx, gvs) return gvs_list
def _local_perm(inputs, is_masked, perm_size, seq_len, leak_ratio): """Samples a permutation of the factorization order. Creates perm_mask and target_mask accordingly. Args: inputs: int64 Tensor in shape [seq_len], input ids. is_masked: bool Tensor in shape [seq_len]. True means being selected for partial prediction. perm_size: the length of longest permutation. Could be set to be reuse_len. Should not be larger than reuse_len or there will be data leaks. seq_len: int, sequence length. leak_ratio: float, percent of masked tokens that are leaked. Returns: perm_mask: float32 Tensor in shape [seq_len, seq_len] consisted of 0 and 1. If perm_mask[i][j] == 1, it means the ith token (in original order) cannot attend to the jth token (in original order). This case will happen only when the ith token's permutated position <= the jth token's permutated position, and the jth token is masked or is func token. If perm_mask[i][j] == 0, it means the ith token (in original order) can attend to the jth token (in original order). Note that non-masked tokens can be attended by all other tokens, which is different from the description in original paper. target_mask: float32 Tensor in shape [seq_len] consisted of 0 and 1. If target_mask[i] == 1, the ith token needs to be predicted and mask will be used as input. This token will count for loss. If target_mask[i] == 0, token (or [SEP], [CLS]) will be used as input. This token will not count for loss. inputs_k: int64 Tensor in shape [seq_len], input ids. inputs_q: float32 Tensor in shape [seq_len], the same as target_mask. """ # Generate permutation indices index = tf.range(seq_len, dtype=tf.int64) index = tf.transpose(tf.reshape(index, [-1, perm_size])) index = tf.random.shuffle(index) index = tf.reshape(tf.transpose(index), [-1]) # non-functional tokens non_func_tokens = tf.logical_not( tf.logical_or(tf.equal(inputs, SEP_ID), tf.equal(inputs, CLS_ID))) masked_tokens = tf.logical_and(is_masked, non_func_tokens) non_masked_or_func_tokens = tf.logical_not(masked_tokens) smallest_index = -2 * tf.ones([seq_len], dtype=tf.int64) # Similar to BERT, randomly leak some masked tokens if leak_ratio > 0: leak_tokens = tf.logical_and( masked_tokens, tf.random.uniform([seq_len], maxval=1.0) < leak_ratio) can_attend_self = tf.logical_or(non_masked_or_func_tokens, leak_tokens) else: can_attend_self = non_masked_or_func_tokens to_index = tf.where(can_attend_self, smallest_index, index) from_index = tf.where(can_attend_self, to_index + 1, to_index) # For masked tokens, can attend if i > j # For context tokens, always can attend each other can_attend = from_index[:, None] > to_index[None, :] # In modeling, 1 indicates cannot attend. Hence, reverse the value here. perm_mask = 1.0 - tf.cast(can_attend, tf.float32) # Only masked tokens are included in the loss target_mask = tf.cast(masked_tokens, tf.float32) # construct inputs_k inputs_k = inputs # construct inputs_q inputs_q = masked_tokens return perm_mask, target_mask, inputs_k, inputs_q
def subsample(self, indicator, batch_size, labels, scope=None): """Returns subsampled minibatch. Args: indicator: boolean tensor of shape [N] whose True entries can be sampled. batch_size: desired batch size. If None, keeps all positive samples and randomly selects negative samples so that the positive sample fraction matches self._positive_fraction. It cannot be None is is_static is True. labels: boolean tensor of shape [N] denoting positive(=True) and negative (=False) examples. scope: name scope. Returns: sampled_idx_indicator: boolean tensor of shape [N], True for entries which are sampled. Raises: ValueError: if labels and indicator are not 1D boolean tensors. """ if len(indicator.get_shape().as_list()) != 1: raise ValueError( 'indicator must be 1 dimensional, got a tensor of ' 'shape %s' % indicator.get_shape()) if len(labels.get_shape().as_list()) != 1: raise ValueError('labels must be 1 dimensional, got a tensor of ' 'shape %s' % labels.get_shape()) if labels.dtype != tf.bool: raise ValueError('labels should be of type bool. Received: %s' % labels.dtype) if indicator.dtype != tf.bool: raise ValueError('indicator should be of type bool. Received: %s' % indicator.dtype) with tf.name_scope(scope, 'BalancedPositiveNegativeSampler'): if self._is_static: return self._static_subsample(indicator, batch_size, labels) else: # Only sample from indicated samples negative_idx = tf.logical_not(labels) positive_idx = tf.logical_and(labels, indicator) negative_idx = tf.logical_and(negative_idx, indicator) # Sample positive and negative samples separately if batch_size is None: max_num_pos = tf.reduce_sum(tf.to_int32(positive_idx)) else: max_num_pos = int(self._positive_fraction * batch_size) sampled_pos_idx = self.subsample_indicator( positive_idx, max_num_pos) num_sampled_pos = tf.reduce_sum( tf.cast(sampled_pos_idx, tf.int32)) if batch_size is None: negative_positive_ratio = ( 1 - self._positive_fraction) / self._positive_fraction max_num_neg = tf.to_int32(negative_positive_ratio * tf.to_float(num_sampled_pos)) else: max_num_neg = batch_size - num_sampled_pos sampled_neg_idx = self.subsample_indicator( negative_idx, max_num_neg) return tf.logical_or(sampled_pos_idx, sampled_neg_idx)
def logical_or(self: TensorType, other: TensorOrScalar) -> TensorType: assert_bool(self) assert_bool(other) return type(self)(tf.logical_or(self.raw, unwrap1(other)))
def build_graph_with_losses(self, FLAGS, batch_data, training=True, summary=False, reuse=False): if FLAGS.guided: batch_data, edge = batch_data edge = edge[:, :, :, 0:1] / 255. edge = tf.cast(edge > FLAGS.edge_threshold, tf.float32) batch_pos = batch_data / 127.5 - 1. # generate mask, 1 represents masked point bbox = random_bbox(FLAGS) regular_mask = bbox2mask(FLAGS, bbox, name='mask_c') irregular_mask = brush_stroke_mask(FLAGS, name='mask_c') mask = tf.cast( tf.logical_or( tf.cast(irregular_mask, tf.bool), tf.cast(regular_mask, tf.bool), ), tf.float32) batch_incomplete = batch_pos * (1. - mask) if FLAGS.guided: edge = edge * mask xin = tf.concat([batch_incomplete, edge], axis=3) else: xin = batch_incomplete x1, x2, offset_flow = self.build_inpaint_net(xin, mask, reuse=reuse, training=training, padding=FLAGS.padding) batch_predicted = x2 losses = {} # apply mask and complete image batch_complete = batch_predicted * mask + batch_incomplete * (1. - mask) # local patches losses['ae_loss'] = FLAGS.l1_loss_alpha * tf.reduce_mean( tf.abs(batch_pos - x1)) losses['ae_loss'] += FLAGS.l1_loss_alpha * tf.reduce_mean( tf.abs(batch_pos - x2)) if summary: scalar_summary('losses/ae_loss', losses['ae_loss']) if FLAGS.guided: viz_img = [batch_pos, batch_incomplete + edge, batch_complete] else: viz_img = [batch_pos, batch_incomplete, batch_complete] if offset_flow is not None: viz_img.append( resize(offset_flow, scale=4, func=tf.image.resize_bilinear)) images_summary(tf.concat(viz_img, axis=2), 'raw_incomplete_predicted_complete', FLAGS.viz_max_out) # gan batch_pos_neg = tf.concat([batch_pos, batch_complete], axis=0) if FLAGS.gan_with_mask: batch_pos_neg = tf.concat([ batch_pos_neg, tf.tile(mask, [FLAGS.batch_size * 2, 1, 1, 1]) ], axis=3) if FLAGS.guided: # conditional GANs batch_pos_neg = tf.concat( [batch_pos_neg, tf.tile(edge, [2, 1, 1, 1])], axis=3) # wgan with gradient penalty if FLAGS.gan == 'sngan': pos_neg = self.build_gan_discriminator(batch_pos_neg, training=training, reuse=reuse) pos, neg = tf.split(pos_neg, 2) g_loss, d_loss = gan_hinge_loss(pos, neg) losses['g_loss'] = g_loss losses['d_loss'] = d_loss else: raise NotImplementedError('{} not implemented.'.format(FLAGS.gan)) if summary: # summary the magnitude of gradients from different losses w.r.t. predicted image gradients_summary(losses['g_loss'], batch_predicted, name='g_loss') gradients_summary(losses['g_loss'], x2, name='g_loss_to_x2') # gradients_summary(losses['ae_loss'], x1, name='ae_loss_to_x1') gradients_summary(losses['ae_loss'], x2, name='ae_loss_to_x2') losses['g_loss'] = FLAGS.gan_loss_alpha * losses['g_loss'] if FLAGS.ae_loss: losses['g_loss'] += losses['ae_loss'] g_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'inpaint_net') d_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'discriminator') return g_vars, d_vars, losses