def to_binary_tf(bar_or_track_bar, threshold=0.0, track_mode=False, melody=False): """Return the binarize tensor of the input tensor (be careful of the channel order!)""" if track_mode: # melody track if melody: melody_is_max = tf.equal(bar_or_track_bar, tf.reduce_max(bar_or_track_bar, axis=2, keep_dims=True)) melody_pass_threshold = (bar_or_track_bar > threshold) out_tensor = tf.logical_and(melody_is_max, melody_pass_threshold) # non-melody track else: out_tensor = (bar_or_track_bar > threshold) return out_tensor else: if len(bar_or_track_bar.get_shape()) == 4: melody_track = tf.slice(bar_or_track_bar, [0, 0, 0, 0], [-1, -1, -1, 1]) other_tracks = tf.slice(bar_or_track_bar, [0, 0, 0, 1], [-1, -1, -1, -1]) elif len(bar_or_track_bar.get_shape()) == 5: melody_track = tf.slice(bar_or_track_bar, [0, 0, 0, 0, 0], [-1, -1, -1, -1, 1]) other_tracks = tf.slice(bar_or_track_bar, [0, 0, 0, 0, 1], [-1, -1, -1, -1, -1]) # melody track melody_is_max = tf.equal(melody_track, tf.reduce_max(melody_track, axis=2, keep_dims=True)) melody_pass_threshold = (melody_track > threshold) out_tensor_melody = tf.logical_and(melody_is_max, melody_pass_threshold) # other tracks out_tensor_others = (other_tracks > threshold) if len(bar_or_track_bar.get_shape()) == 4: return tf.concat([out_tensor_melody, out_tensor_others], 3) elif len(bar_or_track_bar.get_shape()) == 5: return tf.concat([out_tensor_melody, out_tensor_others], 4)
def check_convergence(self, new_T0, new_transition, new_emission): delta_T0 = tf.reduce_max(tf.abs(self.T0 - new_T0)) < self.epsilon delta_T = tf.reduce_max(tf.abs(self.T - new_transition)) < self.epsilon delta_E = tf.reduce_max(tf.abs(self.E - new_emission)) < self.epsilon return tf.logical_and(tf.logical_and(delta_T0, delta_T), delta_E)
def m_body(i, ta_tp, ta_fp, gmatch, n_ignored_det): # Jaccard score with groundtruth bboxes. rbbox = bboxes[i, :] # rbbox = tf.Print(rbbox, [rbbox]) jaccard = bboxes_jaccard(rbbox, gxs, gys) # Best fit, checking it's above threshold. idxmax = tf.cast(tf.argmax(jaccard, axis=0), dtype = tf.int32) jcdmax = jaccard[idxmax] match = jcdmax > matching_threshold existing_match = gmatch[idxmax] not_ignored = tf.logical_not(gignored[idxmax]) n_ignored_det = n_ignored_det + tf.cast(gignored[idxmax], tf.int32) # TP: match & no previous match and FP: previous match | no match. # If ignored: no record, i.e FP=False and TP=False. tp = tf.logical_and(not_ignored, tf.logical_and(match, tf.logical_not(existing_match))) ta_tp = ta_tp.write(i, tp) fp = tf.logical_and(not_ignored, tf.logical_or(existing_match, tf.logical_not(match))) ta_fp = ta_fp.write(i, fp) # Update grountruth match. mask = tf.logical_and(tf.equal(grange, idxmax), tf.logical_and(not_ignored, match)) gmatch = tf.logical_or(gmatch, mask) return [i+1, ta_tp, ta_fp, gmatch,n_ignored_det]
def m_body(i, ta_tp, ta_fp, gmatch): # Jaccard score with groundtruth bboxes. rbbox = bboxes[i] jaccard = bboxes_jaccard(rbbox, gbboxes) jaccard = jaccard * tf.cast(tf.equal(glabels, rlabel), dtype=jaccard.dtype) # Best fit, checking it's above threshold. idxmax = tf.cast(tf.argmax(jaccard, axis=0), tf.int32) jcdmax = jaccard[idxmax] match = jcdmax > matching_threshold existing_match = gmatch[idxmax] not_difficult = tf.logical_not(gdifficults[idxmax]) # TP: match & no previous match and FP: previous match | no match. # If difficult: no record, i.e FP=False and TP=False. tp = tf.logical_and(not_difficult, tf.logical_and(match, tf.logical_not(existing_match))) ta_tp = ta_tp.write(i, tp) fp = tf.logical_and(not_difficult, tf.logical_or(existing_match, tf.logical_not(match))) ta_fp = ta_fp.write(i, fp) # Update grountruth match. mask = tf.logical_and(tf.equal(grange, idxmax), tf.logical_and(not_difficult, match)) gmatch = tf.logical_or(gmatch, mask) return [i+1, ta_tp, ta_fp, gmatch]
def prune_outside_window(keypoints, window, scope=None): """Prunes keypoints that fall outside a given window. This function replaces keypoints that fall outside the given window with nan. See also clip_to_window which clips any keypoints that fall outside the given window. Args: keypoints: a tensor of shape [num_instances, num_keypoints, 2] window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max] window outside of which the op should prune the keypoints. scope: name scope. Returns: new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] """ with tf.name_scope(scope, 'PruneOutsideWindow'): y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) valid_indices = tf.logical_and( tf.logical_and(y >= win_y_min, y <= win_y_max), tf.logical_and(x >= win_x_min, x <= win_x_max)) new_y = tf.where(valid_indices, y, np.nan * tf.ones_like(y)) new_x = tf.where(valid_indices, x, np.nan * tf.ones_like(x)) new_keypoints = tf.concat([new_y, new_x], 2) return new_keypoints
def getRpRnTpTnForTrain0OrVal1(self, y, training0OrValidation1): # The returned list has (numberOfClasses)x4 integers: >numberOfRealPositives, numberOfRealNegatives, numberOfTruePredictedPositives, numberOfTruePredictedNegatives< for each class (incl background). # Order in the list is the natural order of the classes (ie class-0 RP,RN,TPP,TPN, class-1 RP,RN,TPP,TPN, class-2 RP,RN,TPP,TPN ...) # param y: y = T.itensor4('y'). Dimensions [batchSize, r, c, z] yPredToUse = self.y_pred_train if training0OrValidation1 == 0 else self.y_pred_val returnedListWithNumberOfRpRnTpTnForEachClass = [] for class_i in range(0, self._numberOfOutputClasses) : #Number of Real Positive, Real Negatives, True Predicted Positives and True Predicted Negatives are reported PER CLASS (first for WHOLE). tensorOneAtRealPos = tf.equal(y, class_i) tensorOneAtRealNeg = tf.logical_not(tensorOneAtRealPos) tensorOneAtPredictedPos = tf.equal(yPredToUse, class_i) tensorOneAtPredictedNeg = tf.logical_not(tensorOneAtPredictedPos) tensorOneAtTruePos = tf.logical_and(tensorOneAtRealPos,tensorOneAtPredictedPos) tensorOneAtTrueNeg = tf.logical_and(tensorOneAtRealNeg,tensorOneAtPredictedNeg) returnedListWithNumberOfRpRnTpTnForEachClass.append( tf.reduce_sum( tf.cast(tensorOneAtRealPos, dtype="int32")) ) returnedListWithNumberOfRpRnTpTnForEachClass.append( tf.reduce_sum( tf.cast(tensorOneAtRealNeg, dtype="int32")) ) returnedListWithNumberOfRpRnTpTnForEachClass.append( tf.reduce_sum( tf.cast(tensorOneAtTruePos, dtype="int32")) ) returnedListWithNumberOfRpRnTpTnForEachClass.append( tf.reduce_sum( tf.cast(tensorOneAtTrueNeg, dtype="int32")) ) return returnedListWithNumberOfRpRnTpTnForEachClass
def build_graph(self, nn_im_w, nn_im_h, num_colour_channels=3, weights=None, biases=None): num_outputs = 1 #ofc self.nn_im_w = nn_im_w self.nn_im_h = nn_im_h if weights is None: weights = [None, None, None, None, None] if biases is None: biases = [None, None, None, None, None] with tf.device('/cpu:0'): # Placeholder variables for the input image and output images self.x = tf.placeholder(tf.float32, shape=[None, nn_im_w*nn_im_h*3]) self.y_ = tf.placeholder(tf.float32, shape=[None, num_outputs]) self.threshold = tf.placeholder(tf.float32) # Build the convolutional and pooling layers conv1_output_channels = 32 conv2_output_channels = 16 conv3_output_channels = 8 conv_layer_1_input = tf.reshape(self.x, [-1, nn_im_h, nn_im_w, num_colour_channels]) #The resized input image self.build_conv_layer(conv_layer_1_input, num_colour_channels, conv1_output_channels, initial_weights=weights[0], initial_biases=biases[0]) # layer 1 self.build_conv_layer(self.layers[0][0], conv1_output_channels, conv2_output_channels, initial_weights=weights[1], initial_biases=biases[1])# layer 2 self.build_conv_layer(self.layers[1][0], conv2_output_channels, conv3_output_channels, initial_weights=weights[2], initial_biases=biases[2])# layer 3 # Build the fully connected layer convnet_output_w = nn_im_w//8 convnet_output_h = nn_im_h//8 fully_connected_layer_input = tf.reshape(self.layers[2][0], [-1, convnet_output_w * convnet_output_h * conv3_output_channels]) self.build_fully_connected_layer(fully_connected_layer_input, convnet_output_w, convnet_output_h, conv3_output_channels, initial_weights=weights[3], initial_biases=biases[3]) # The dropout stage and readout layer self.keep_prob, self.h_drop = self.dropout(self.layers[3][0]) self.y_conv,_,_ = self.build_readout_layer(self.h_drop, num_outputs, initial_weights=weights[4], initial_biases=biases[4]) self.mean_error = tf.sqrt(tf.reduce_mean(tf.square(self.y_ - self.y_conv))) self.train_step = tf.train.AdamOptimizer(1e-4).minimize(self.mean_error) self.accuracy = (1.0 - tf.reduce_mean(tf.abs(self.y_ - tf.round(self.y_conv)))) positive_examples = tf.greater_equal(self.y_, 0.5) negative_examples = tf.logical_not(positive_examples) positive_classifications = tf.greater_equal(self.y_conv, self.threshold) negative_classifications = tf.logical_not(positive_classifications) self.true_positive = tf.reduce_sum(tf.cast(tf.logical_and(positive_examples, positive_classifications),tf.int32)) # count the examples that are positive and classified as positive self.false_positive = tf.reduce_sum(tf.cast(tf.logical_and(negative_examples, positive_classifications),tf.int32)) # count the examples that are negative but classified as positive self.true_negative = tf.reduce_sum(tf.cast(tf.logical_and(negative_examples, negative_classifications),tf.int32)) # count the examples that are negative and classified as negative self.false_negative = tf.reduce_sum(tf.cast(tf.logical_and(positive_examples, negative_classifications),tf.int32)) # count the examples that are positive but classified as negative self.positive_count = tf.reduce_sum(tf.cast(positive_examples, tf.int32)) # count the examples that are positive self.negative_count = tf.reduce_sum(tf.cast(negative_examples, tf.int32)) # count the examples that are negative self.confusion_matrix = tf.reshape(tf.pack([self.true_positive, self.false_positive, self.false_negative, self.true_negative]), [2,2]) self.sess.run(tf.initialize_all_variables())
def subsample(self, indicator, batch_size, labels, scope=None): """Returns subsampled minibatch. Args: indicator: boolean tensor of shape [N] whose True entries can be sampled. batch_size: desired batch size. If None, keeps all positive samples and randomly selects negative samples so that the positive sample fraction matches self._positive_fraction. It cannot be None is is_static is True. labels: boolean tensor of shape [N] denoting positive(=True) and negative (=False) examples. scope: name scope. Returns: sampled_idx_indicator: boolean tensor of shape [N], True for entries which are sampled. Raises: ValueError: if labels and indicator are not 1D boolean tensors. """ if len(indicator.get_shape().as_list()) != 1: raise ValueError('indicator must be 1 dimensional, got a tensor of ' 'shape %s' % indicator.get_shape()) if len(labels.get_shape().as_list()) != 1: raise ValueError('labels must be 1 dimensional, got a tensor of ' 'shape %s' % labels.get_shape()) if labels.dtype != tf.bool: raise ValueError('labels should be of type bool. Received: %s' % labels.dtype) if indicator.dtype != tf.bool: raise ValueError('indicator should be of type bool. Received: %s' % indicator.dtype) with tf.name_scope(scope, 'BalancedPositiveNegativeSampler'): if self._is_static: return self._static_subsample(indicator, batch_size, labels) else: # Only sample from indicated samples negative_idx = tf.logical_not(labels) positive_idx = tf.logical_and(labels, indicator) negative_idx = tf.logical_and(negative_idx, indicator) # Sample positive and negative samples separately if batch_size is None: max_num_pos = tf.reduce_sum(tf.to_int32(positive_idx)) else: max_num_pos = int(self._positive_fraction * batch_size) sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos) num_sampled_pos = tf.reduce_sum(tf.cast(sampled_pos_idx, tf.int32)) if batch_size is None: negative_positive_ratio = ( 1 - self._positive_fraction) / self._positive_fraction max_num_neg = tf.to_int32( negative_positive_ratio * tf.to_float(num_sampled_pos)) else: max_num_neg = batch_size - num_sampled_pos sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg) return tf.logical_or(sampled_pos_idx, sampled_neg_idx)
def tf_F1_score(actuals, predictions): actuals = tf.reshape(actuals, [-1, 1]) predictions = tf.reshape(predictions, [-1, 1]) ones_like_actuals = tf.ones_like(actuals) zeros_like_actuals = tf.zeros_like(actuals) ones_like_predictions = tf.ones_like(predictions) zeros_like_predictions = tf.zeros_like(predictions) #true-positive tp_op = tf.reduce_sum( tf.cast( tf.logical_and( tf.equal(actuals, ones_like_actuals), tf.equal(predictions, ones_like_predictions) ), dtype=tf.float32 ) ) #true-Negative tn_op = tf.reduce_sum( tf.cast( tf.logical_and( tf.equal(actuals, zeros_like_actuals), tf.equal(predictions, zeros_like_predictions) ), dtype=tf.float32 ) ) #false-positive fp_op = tf.reduce_sum( tf.cast( tf.logical_and( tf.equal(actuals, zeros_like_actuals), tf.equal(predictions, ones_like_predictions) ), dtype=tf.float32 ) ) #false_Neg fn_op = tf.reduce_sum( tf.cast( tf.logical_and( tf.equal(actuals, ones_like_actuals), tf.equal(predictions, zeros_like_predictions) ), dtype=tf.float32 ) ) accuracy = (tp_op + tn_op) / (tp_op + tn_op + fp_op + fn_op) prediction = tp_op / (tp_op + fp_op) recall = tp_op / (tp_op + fn_op) f1_score = (2 * (prediction * recall)) / (prediction + recall) return accuracy, [tp_op, tn_op, fp_op, fn_op, f1_score]
def sensitivity(logits, labels): predictions = tf.argmax(logits, axis=-1) actuals = tf.argmax(labels, axis=-1) nodule_actuals = tf.ones_like(actuals) non_nodule_actuals = tf.zeros_like(actuals) nodule_predictions = tf.ones_like(predictions) non_nodule_predictions = tf.zeros_like(predictions) tp_op = tf.reduce_sum( tf.cast( tf.logical_and( tf.equal(actuals, nodule_actuals), tf.equal(predictions, nodule_predictions) ), tf.float32 ) ) tn_op = tf.reduce_sum( tf.cast( tf.logical_and( tf.equal(actuals, non_nodule_actuals), tf.equal(predictions, non_nodule_predictions) ), tf.float32 ) ) fp_op = tf.reduce_sum( tf.cast( tf.logical_and( tf.equal(actuals, non_nodule_actuals), tf.equal(predictions, nodule_predictions) ), tf.float32 ) ) fn_op = tf.reduce_sum( tf.cast( tf.logical_and( tf.equal(actuals, nodule_actuals), tf.equal(predictions, non_nodule_predictions) ), tf.float32 ) ) false_positive_rate = fp_op / (fp_op + tn_op) recall = tp_op / (tp_op + fn_op) return recall, false_positive_rate
def do_eval(self, f1 = False): """INTENT : Evaluate the CDBN as a classifier""" input_placeholder = tf.placeholder(tf.float32, shape=self.input) eval = tf.reshape(self._get_input_level(self.number_layer,input_placeholder), [self.batch_size, -1]) y = tf.nn.softmax(tf.matmul(eval, self.W) + self.b) y_ = tf.placeholder(tf.float32, [None,self.output_classes]) if f1: predicted_class = tf.argmax(y,1) real_class = tf.argmax(y_,1) zeros = tf.zeros_like(predicted_class) ones = tf.ones_like(predicted_class) true_positive = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(predicted_class, ones),tf.equal(real_class, ones)), tf.float32)) tp_count = 0 false_positive = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(predicted_class, ones),tf.equal(real_class, zeros)), tf.float32)) fp_count = 0 true_negative = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(predicted_class, zeros),tf.equal(real_class, zeros)), tf.float32)) tn_count = 0 false_negative = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(predicted_class, zeros),tf.equal(real_class, ones)), tf.float32)) fn_count = 0 else: correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) correct_count = tf.reduce_sum(tf.cast(correct_prediction, tf.float32)) true_count = 0 num_examples = self.data.num_test_example steps_per_epoch = num_examples // self.batch_size for step in range(steps_per_epoch): images_feed, labels_feed = self.data.next_batch(self.batch_size, 'test') visible = np.reshape(images_feed, self.input) if f1: a,b,c,d = self.session.run([true_positive,false_positive,true_negative,false_negative], feed_dict={input_placeholder: visible, y_: labels_feed}) tp_count += a fp_count += b tn_count += c fn_count += d else: true_count += self.session.run(correct_count, feed_dict={input_placeholder: visible, y_: labels_feed}) if self.verbosity > 0: print('--------------------------') if f1: precision = tp_count / (tp_count+fp_count) recall = tp_count / (tp_count+fn_count) f1_score = 2*precision*recall/(precision+recall) overall_precision = (tp_count + tn_count) / (fn_count+ fp_count + tp_count +tn_count) print('Successfully evaluated the CDBN : \n Precision is %0.02f percent \n Recall is %0.02f percent \n F1 score is %0.02f\n tp: %d --- fp: %d --- tn: %d --- fn: %d\n Overall precision is %0.02f percent' %(precision*100, recall*100, f1_score, tp_count, fp_count, tn_count, fn_count, overall_precision * 100)) else: precision = true_count / num_examples print('Successfully evaluated the CDBN : \n %d examples are correctly classified out of %d total examples\n Precision is %0.02f percent' %(true_count, num_examples, precision*100))
def detection_loss(location, confidence, refine_ph, classes_ph, pos_mask): neg_mask = tf.logical_not(pos_mask) number_of_positives = tf.reduce_sum(tf.to_int32(pos_mask)) true_number_of_negatives = tf.minimum(3 * number_of_positives, tf.shape(pos_mask)[1] - number_of_positives) # max is to avoid the case where no positive boxes were sampled number_of_negatives = tf.maximum(1, true_number_of_negatives) num_pos_float = tf.to_float(tf.maximum(1, number_of_positives)) normalizer = tf.to_float(tf.add(number_of_positives, number_of_negatives)) tf.summary.scalar('batch/size', normalizer) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=confidence, labels=classes_ph) pos_class_loss = tf.reduce_sum(tf.boolean_mask(cross_entropy, pos_mask)) tf.summary.scalar('loss/class_pos', pos_class_loss / num_pos_float) top_k_worst, top_k_inds = tf.nn.top_k(tf.boolean_mask(cross_entropy, neg_mask), number_of_negatives) # multiplication is to avoid the case where no positive boxes were sampled neg_class_loss = tf.reduce_sum(top_k_worst) * \ tf.cast(tf.greater(true_number_of_negatives, 0), tf.float32) class_loss = (neg_class_loss + pos_class_loss) / num_pos_float tf.summary.scalar('loss/class_neg', neg_class_loss / tf.to_float(number_of_negatives)) tf.summary.scalar('loss/class', class_loss) # cond is to avoid the case where no positive boxes were sampled bbox_loss = tf.cond(tf.equal(tf.reduce_sum(tf.cast(pos_mask, tf.int32)), 0), lambda: 0.0, lambda: tf.reduce_mean(smooth_l1(tf.boolean_mask(location, pos_mask), tf.boolean_mask(refine_ph, pos_mask)))) tf.summary.scalar('loss/bbox', bbox_loss) inferred_class = tf.cast(tf.argmax(confidence, 2), tf.int32) positive_matches = tf.equal(tf.boolean_mask(inferred_class, pos_mask), tf.boolean_mask(classes_ph, pos_mask)) hard_matches = tf.equal(tf.boolean_mask(inferred_class, neg_mask), tf.boolean_mask(classes_ph, neg_mask)) hard_matches = tf.gather(hard_matches, top_k_inds) train_acc = ((tf.reduce_sum(tf.to_float(positive_matches)) + tf.reduce_sum(tf.to_float(hard_matches))) / normalizer) tf.summary.scalar('accuracy/train', train_acc) recognized_class = tf.argmax(confidence, 2) tp = tf.reduce_sum(tf.to_float(tf.logical_and(recognized_class > 0, pos_mask))) fp = tf.reduce_sum(tf.to_float(tf.logical_and(recognized_class > 0, neg_mask))) fn = tf.reduce_sum(tf.to_float(tf.logical_and(tf.equal(recognized_class, 0), pos_mask))) precision = tp / (tp + fp) recall = tp / (tp + fn) f1 = 2*(precision * recall)/(precision + recall) tf.summary.scalar('metrics/train/precision', precision) tf.summary.scalar('metrics/train/recall', recall) tf.summary.scalar('metrics/train/f1', f1) return class_loss, bbox_loss, train_acc, number_of_positives
def _noisy_identity_kernel_initializer(shape, dtype=tf.float32, partition_info=None): """Constructs a noisy identity kernel. Args: shape: List of integers. Represents shape of result. dtype: data type for values in result. partition_info: Partition information for initializer functions. Ignored. Returns: Tensor of desired shape and dtype such that applying it as a convolution kernel results in a noisy near-identity operation. Raises: ValueError: If shape does not define a valid kernel. If filter width and height differ. If filter width and height are not odd numbers. If number of input and output channels are not multiples of base_num_channels. """ if len(shape) != 4: raise ValueError("Convolution kernels must be rank 4.") filter_height, filter_width, in_channels, out_channels = shape if filter_width != filter_height: raise ValueError( "Noisy identity initializer only works for square filters.") if filter_width % 2 != 1: raise ValueError( "Noisy identity initializer requires filters have odd height and " "width.") if (in_channels % base_num_channels != 0 or out_channels % base_num_channels != 0): raise ValueError("in_channels and out_channels must both be multiples of " "base_num_channels.") middle_pixel = filter_height // 2 is_middle_pixel = tf.logical_and( tf.equal(_range_along_dimension(0, shape), middle_pixel), tf.equal(_range_along_dimension(1, shape), middle_pixel)) is_same_channel_multiple = tf.equal( tf.floordiv( _range_along_dimension(2, shape) * base_num_channels, in_channels), tf.floordiv( _range_along_dimension(3, shape) * base_num_channels, out_channels)) noise = tf.truncated_normal(shape, stddev=stddev, dtype=dtype) return tf.where( tf.logical_and(is_same_channel_multiple, is_middle_pixel), tf.ones( shape, dtype=dtype) * (base_num_channels / out_channels), noise)
def accuracy(logits, labels): def tf_count(t, val): elements_equal_to_value = tf.equal(t, val) as_ints = tf.cast(elements_equal_to_value, tf.int32) count = tf.reduce_sum(as_ints) return count labels = tf.cast(labels, tf.int64) label_shape = labels.get_shape().as_list() reshaped_labels = tf.reshape(labels, [label_shape[0]*label_shape[1]*label_shape[2]]) logits_shape = logits.get_shape().as_list() reshaped_logits = tf.reshape(logits, [logits_shape[0]*logits_shape[1]*logits_shape[2], logits_shape[3]]) predictions = tf.argmax(reshaped_logits, dimension=1) shaped_predictions = tf.argmax(logits, dimension=3) correct_predictions = tf.equal(predictions, reshaped_labels) accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name='accuracy') tf.add_to_collection('accuracy', accuracy) tf.histogram_summary('predictions_hist', predictions) imgs_to_summarize = tf.expand_dims(tf.cast(shaped_predictions, 'float32'), -1) tf.image_summary('predictions', imgs_to_summarize) cat_names = CLASSES precision = [] cat_acc = [] for cat_id,cat in enumerate(cat_names): cat_pred = tf.equal(predictions, cat_id, name=cat+"_pred") cat_truth = tf.equal(reshaped_labels, cat_id, name=cat+"_truth") non_cat_truth = tf.not_equal(reshaped_labels, cat_id, name=cat+"_non_truth") tp = tf.logical_and(cat_pred, cat_truth, name=cat+"_tp") tp_count = tf.reduce_sum(tf.cast(tp, "float"), name=cat+"_tp_count") fp = tf.logical_and(cat_pred, non_cat_truth, name=cat+"_fp") fp_count = tf.reduce_sum(tf.cast(fp, "float"), name=cat+"_fp_count") tf.scalar_summary('cat_precisions/'+cat+'_fp_count', fp_count) tf.scalar_summary('cat_precisions/'+cat+'_tp_count', tp_count) precision.append( tp_count / (tp_count + fp_count) ) cat_correct = tf.logical_and(cat_truth, cat_pred, name=cat+"_correct") cat_acc.append(tf.reduce_mean(tf.cast(cat_correct, "float"), name=cat+"_accuracy")) precisions = tf.pack(precision) accuracies = tf.pack(cat_acc) tf.add_to_collection('precisions',precisions) return accuracy, precisions, accuracies
def _define_summaries(): """Reset the average score and duration, and return them as summary. Returns: Summary string. """ score_summary = tf.cond( tf.logical_and(log, tf.cast(mean_score.count, tf.bool)), lambda: tf.summary.scalar('mean_score', mean_score.clear()), str) length_summary = tf.cond( tf.logical_and(log, tf.cast(mean_length.count, tf.bool)), lambda: tf.summary.scalar('mean_length', mean_length.clear()), str) return tf.summary.merge([score_summary, length_summary])
def _get_filled_box_idx(idx, top_left, bot_right): """Fill a box with top left and bottom right coordinates.""" # [B, T, H, W] idx_y = idx[:, :, :, :, 0] idx_x = idx[:, :, :, :, 1] top_left_y = tf.expand_dims(tf.expand_dims(top_left[:, :, 0], 2), 3) top_left_x = tf.expand_dims(tf.expand_dims(top_left[:, :, 1], 2), 3) bot_right_y = tf.expand_dims(tf.expand_dims(bot_right[:, :, 0], 2), 3) bot_right_x = tf.expand_dims(tf.expand_dims(bot_right[:, :, 1], 2), 3) lower = tf.logical_and(idx_y >= top_left_y, idx_x >= top_left_x) upper = tf.logical_and(idx_y <= bot_right_y, idx_x <= bot_right_x) box = tf.to_float(tf.logical_and(lower, upper)) return box
def iou(x_true, y_true, w_true, h_true, x_pred, y_pred, w_pred, h_pred, t, pred_confid_tf): x_true = K.expand_dims(x_true, 2) y_true = K.expand_dims(y_true, 2) w_true = K.expand_dims(w_true, 2) h_true = K.expand_dims(h_true, 2) x_pred = K.expand_dims(x_pred, 2) y_pred = K.expand_dims(y_pred, 2) w_pred = K.expand_dims(w_pred, 2) h_pred = K.expand_dims(h_pred, 2) xoffset = K.expand_dims(tf.convert_to_tensor(np.asarray([0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7], dtype=np.float32)),1) yoffset = K.expand_dims(tf.convert_to_tensor(np.asarray([0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4], dtype=np.float32)),1) # xoffset = K.cast_to_floatx((np.tile(np.arange(side),side))) # yoffset = K.cast_to_floatx((np.repeat(np.arange(side),side))) x = tf.where(t, x_pred, K.zeros_like(x_pred)) y = tf.where(t, y_pred, K.zeros_like(y_pred)) w = tf.where(t, w_pred, K.zeros_like(w_pred)) h = tf.where(t, h_pred, K.zeros_like(h_pred)) ow = overlap(x + xoffset, w * 256. , x_true + xoffset, w_true * 256.) oh = overlap(y + yoffset, h * 160., y_true + yoffset, h_true * 256.) ow = tf.where(K.greater(ow, 0), ow, K.zeros_like(ow)) oh = tf.where(K.greater(oh, 0), oh, K.zeros_like(oh)) intersection = ow * oh union = w * 256. * h * 160. + w_true * 256. * h_true * 160. - intersection + K.epsilon() # prevent div 0 # # find best iou among bboxs # iouall shape=(-1, bnum*gridcells) iouall = intersection / union obj_count = K.sum(tf.where(t, K.ones_like(x_true), K.zeros_like(x_true))) ave_iou = K.sum(iouall) / (obj_count + 0.0000001) recall_t = K.greater(iouall, 0.5) # recall_count = K.sum(tf.select(recall_t, K.ones_like(iouall), K.zeros_like(iouall))) fid_t = K.greater(pred_confid_tf, 0.3) recall_count_all = K.sum(tf.where(fid_t, K.ones_like(iouall), K.zeros_like(iouall))) # obj_fid_t = tf.logical_and(fid_t, t) obj_fid_t = tf.logical_and(fid_t, recall_t) effevtive_iou_count = K.sum(tf.where(obj_fid_t, K.ones_like(iouall), K.zeros_like(iouall))) recall = effevtive_iou_count / (obj_count + 0.00000001) precision = effevtive_iou_count / (recall_count_all + 0.0000001) return ave_iou, recall, precision, obj_count, intersection, union, ow, oh, x, y, w, h
def precision(mod_y, ref_y, summary=True, name="precision"): with tf.name_scope(name): predictions = tf.argmax(mod_y, 1) actuals = tf.argmax(ref_y, 1) ones_likes = tf.ones_like(actuals) zeros_likes = tf.zeros_like(actuals) tp = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(actuals, zeros_likes), tf.equal(predictions, zeros_likes)), tf.float32)) fp = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(actuals, ones_likes), tf.equal(predictions, zeros_likes)), tf.float32)) precision = tf.div(tp, tf.add(tp, fp)) if summary: tf.summary.scalar('precision', precision) return precision
def recall(mod_y, ref_y, summary=True, name="recall"): with tf.name_scope(name): predictions = tf.argmax(mod_y, 1) actuals = tf.argmax(ref_y, 1) ones_likes = tf.ones_like(actuals) zeros_likes = tf.zeros_like(actuals) tp = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(actuals, zeros_likes), tf.equal(predictions, zeros_likes)), tf.float32)) fn = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(actuals, zeros_likes), tf.equal(predictions, ones_likes)), tf.float32)) recall = tf.div(tp, tf.add(tp, fn)) if summary: tf.summary.scalar('recall', recall) return recall
def while_step(t, rnn_state, tas, accs): """Implements one timestep of FIVO computation.""" log_weights_acc, log_p_hat_acc, kl_acc = accs cur_inputs, cur_mask = nested.read_tas([inputs_ta, mask_ta], t) # Run the cell for one step. log_q_z, log_p_z, log_p_x_given_z, kl, new_state = cell( cur_inputs, rnn_state, cur_mask, ) # Compute the incremental weight and use it to update the current # accumulated weight. kl_acc += kl * cur_mask log_alpha = (log_p_x_given_z + log_p_z - log_q_z) * cur_mask log_alpha = tf.reshape(log_alpha, [num_samples, batch_size]) log_weights_acc += log_alpha # Calculate the effective sample size. ess_num = 2 * tf.reduce_logsumexp(log_weights_acc, axis=0) ess_denom = tf.reduce_logsumexp(2 * log_weights_acc, axis=0) log_ess = ess_num - ess_denom # Calculate the ancestor indices via resampling. Because we maintain the # log unnormalized weights, we pass the weights in as logits, allowing # the distribution object to apply a softmax and normalize them. resampling_dist = tf.contrib.distributions.Categorical( logits=tf.transpose(log_weights_acc, perm=[1, 0])) ancestor_inds = tf.stop_gradient( resampling_dist.sample(sample_shape=num_samples, seed=random_seed)) # Because the batch is flattened and laid out as discussed # above, we must modify ancestor_inds to index the proper samples. # The particles in the ith filter are distributed every batch_size rows # in the batch, and offset i rows from the top. So, to correct the indices # we multiply by the batch_size and add the proper offset. Crucially, # when ancestor_inds is flattened the layout of the batch is maintained. offset = tf.expand_dims(tf.range(batch_size), 0) ancestor_inds = tf.reshape(ancestor_inds * batch_size + offset, [-1]) noresample_inds = tf.range(num_samples * batch_size) # Decide whether or not we should resample; don't resample if we are past # the end of a sequence. should_resample = resampling_criterion(num_samples, log_ess, t) should_resample = tf.logical_and(should_resample, cur_mask[:batch_size] > 0.) float_should_resample = tf.to_float(should_resample) ancestor_inds = tf.where( tf.tile(should_resample, [num_samples]), ancestor_inds, noresample_inds) new_state = nested.gather_tensors(new_state, ancestor_inds) # Update the TensorArrays before we reset the weights so that we capture # the incremental weights and not zeros. ta_updates = [log_weights_acc, log_ess, float_should_resample] new_tas = [ta.write(t, x) for ta, x in zip(tas, ta_updates)] # For the particle filters that resampled, update log_p_hat and # reset weights to zero. log_p_hat_update = tf.reduce_logsumexp( log_weights_acc, axis=0) - tf.log(tf.to_float(num_samples)) log_p_hat_acc += log_p_hat_update * float_should_resample log_weights_acc *= (1. - tf.tile(float_should_resample[tf.newaxis, :], [num_samples, 1])) new_accs = (log_weights_acc, log_p_hat_acc, kl_acc) return t + 1, new_state, new_tas, new_accs
def get_mu_tensor(self): const_fact = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var coef = tf.Variable([-1.0, 3.0, 0.0, 1.0], dtype=tf.float32, name="cubic_solver_coef") coef = tf.scatter_update(coef, tf.constant(2), -(3 + const_fact) ) roots = tf.py_func(np.roots, [coef], Tout=tf.complex64, stateful=False) # filter out the correct root root_idx = tf.logical_and(tf.logical_and(tf.greater(tf.real(roots), tf.constant(0.0) ), tf.less(tf.real(roots), tf.constant(1.0) ) ), tf.less(tf.abs(tf.imag(roots) ), 1e-5) ) # in case there are two duplicated roots satisfying the above condition root = tf.reshape(tf.gather(tf.gather(roots, tf.where(root_idx) ), tf.constant(0) ), shape=[] ) tf.assert_equal(tf.size(root), tf.constant(1) ) dr = self._h_max / self._h_min mu = tf.maximum(tf.real(root)**2, ( (tf.sqrt(dr) - 1)/(tf.sqrt(dr) + 1) )**2) return mu
def unwrap(p, discont=np.pi, axis=-1): """Unwrap a cyclical phase tensor. Args: p: Phase tensor. discont: Float, size of the cyclic discontinuity. axis: Axis of which to unwrap. Returns: unwrapped: Unwrapped tensor of same size as input. """ dd = diff(p, axis=axis) ddmod = tf.mod(dd + np.pi, 2.0 * np.pi) - np.pi idx = tf.logical_and(tf.equal(ddmod, -np.pi), tf.greater(dd, 0)) ddmod = tf.where(idx, tf.ones_like(ddmod) * np.pi, ddmod) ph_correct = ddmod - dd idx = tf.less(tf.abs(dd), discont) ddmod = tf.where(idx, tf.zeros_like(ddmod), dd) ph_cumsum = tf.cumsum(ph_correct, axis=axis) shape = p.get_shape().as_list() shape[axis] = 1 ph_cumsum = tf.concat([tf.zeros(shape, dtype=p.dtype), ph_cumsum], axis=axis) unwrapped = p + ph_cumsum return unwrapped
def get_mask(gt, num_classes, ignore_label): less_equal_class = tf.less_equal(gt, num_classes-1) not_equal_ignore = tf.not_equal(gt, ignore_label) mask = tf.logical_and(less_equal_class, not_equal_ignore) indices = tf.squeeze(tf.where(mask), 1) return indices
def getReward_touch(objCoordinates, sampled_locs, numObjsPresented, objSize, batch_size): # preallocate for the reward corner = tf.zeros((2,), dtype=tf.float32, name=None) # reward = np.zeros(batch_size) # loop over all examples in the batch # for b in xrange(batch_size): b = 0 objCoords_b = objCoordinates[b,:,:] sampled_locs_b = sampled_locs[b,:,:] numObjsPres_b = numObjsPresented[b] nObjTouched = 0 # for the ith-example in the batch, loop over all object for j in xrange(maxNumObj): objCoords_cur = objCoords_b[j,:] nTimesObjTouched = 0 # for the j-th objects, loop over all glimpses to determine if it is fixated for i in xrange(nGlimpses): sampledCoord_cur = toMnistCoordinates_tf(sampled_locs_b[i,:], img_size) l2Diff_obj = l2distance(objCoords_cur, sampledCoord_cur) l2Diff_corner = l2distance(corner, sampledCoord_cur) isTouchingObj = tf.less_equal(l2Diff_obj, objSize) isNotTouchingCorner = tf.greater_equal(l2Diff_corner, objSize) # true if the current glimpse is fixated on an object tempTouchFlag = tf.cast(tf.logical_and(isTouchingObj, isNotTouchingCorner), tf.int32) nTimesObjTouched = nTimesObjTouched + tempTouchFlag # for the b-th example in the batch, if all objects are touched, then reward = 1, else reward = 0 nObjTouched = nObjTouched + tf.cast(tf.greater_equal(nTimesObjTouched,1), tf.int32) R_bth = tf.equal(nObjTouched, tf.cast(numObjsPres_b, tf.int32)) return R_bth
def _has_foreground_and_background_in_first_frame(label, subsampling_factor): """Checks if the labels have foreground and background in the first frame. Args: label: Label tensor of shape [num_frames, height, width, 1]. subsampling_factor: Integer, the subsampling factor. Returns: Boolean, whether the labels have foreground and background in the first frame. """ h, w = train_utils.resolve_shape(label)[1:3] label_downscaled = tf.squeeze( tf.image.resize_nearest_neighbor(label[0, tf.newaxis], [h // subsampling_factor, w // subsampling_factor], align_corners=True), axis=0) is_bg = tf.equal(label_downscaled, 0) is_fg = tf.logical_not(is_bg) # Just using reduce_any was not robust enough, so lets make sure the count # is above MIN_LABEL_COUNT. fg_count = tf.reduce_sum(tf.cast(is_fg, tf.int32)) bg_count = tf.reduce_sum(tf.cast(is_bg, tf.int32)) has_bg = tf.greater_equal(fg_count, MIN_LABEL_COUNT) has_fg = tf.greater_equal(bg_count, MIN_LABEL_COUNT) return tf.logical_and(has_bg, has_fg)
def train(self, sentences): token_ids, token_values, token_dense_shape = self._tokenize(sentences) tokens_sparse = tf.sparse.SparseTensor( indices=token_ids, values=token_values, dense_shape=token_dense_shape) tokens = tf.sparse.to_dense(tokens_sparse, default_value="") sparse_lookup_ids = tf.sparse.SparseTensor( indices=tokens_sparse.indices, values=self._words_to_indices(tokens_sparse.values), dense_shape=tokens_sparse.dense_shape) lookup_ids = tf.sparse.to_dense(sparse_lookup_ids, default_value=0) # Targets are the next word for each word of the sentence. tokens_ids_seq = lookup_ids[:, 0:-1] tokens_ids_target = lookup_ids[:, 1:] tokens_prefix = tokens[:, 0:-1] # Mask determining which positions we care about for a loss: all positions # that have a valid non-terminal token. mask = tf.logical_and( tf.logical_not(tf.equal(tokens_prefix, "")), tf.logical_not(tf.equal(tokens_prefix, "<E>"))) input_mask = tf.cast(mask, tf.int32) with tf.GradientTape() as t: sentence_embeddings = tf.nn.embedding_lookup(self._embeddings, tokens_ids_seq) lstm_initial_state = self._lstm_cell.get_initial_state( sentence_embeddings) lstm_output = self._rnn_layer( inputs=sentence_embeddings, initial_state=lstm_initial_state) # Stack LSTM outputs into a batch instead of a 2D array. lstm_output = tf.reshape(lstm_output, [-1, self._lstm_cell.output_size]) logits = self._logit_layer(lstm_output) targets = tf.reshape(tokens_ids_target, [-1]) weights = tf.cast(tf.reshape(input_mask, [-1]), tf.float32) losses = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=targets, logits=logits) # Final loss is the mean loss for all token losses. final_loss = tf.math.divide( tf.reduce_sum(tf.multiply(losses, weights)), tf.reduce_sum(weights), name="final_loss") watched = t.watched_variables() gradients = t.gradient(final_loss, watched) for w, g in zip(watched, gradients): w.assign_sub(g) return final_loss
def loop(step_, beams_, beam_value_, golden_value_, golden_inside_, step_valid_, g_id_, golden_record, beam_record): cur_feat_x_ = tf.gather(x, step_) cur_golden_path_ = tf.gather(golden_path, tf.range(step_)) cur_golden_feat_ = self._add_tag_dynamic(cur_feat_x_, cur_golden_path_) # cur_golden_output_ = self._build_cnn(cur_golden_feat_) cur_golden_output_ = build(cur_golden_feat_) cur_golden_node_ = tf.gather(golden_path, tf.reshape(step_, [1])) golden_value_ = tf.add(golden_value_, tf.slice(cur_golden_output_, tf.concat(0, [[0], cur_golden_node_]), [1, 1])) cur_beam_ = tf.unpack(beams_, num=self.beam_size) cur_beam_feat_ = tf.concat(0, [self._add_tag_dynamic(cur_feat_x_, tf.reshape(e, [-1])) for e in cur_beam_]) # cur_beam_output_ = self._build_cnn(cur_beam_feat_) cur_beam_output_ = build(cur_beam_feat_) golden_record = golden_record.write(step_, cur_golden_output_) beam_record = beam_record.write(step_, cur_beam_output_) beam_value_, beams_ = self._top_beams_new(cur_beam_output_, beam_value_, beams_) new_golden_path_ = tf.gather(golden_path, tf.range(step_ + 1)) # golden_beam_id_ = index_of_tensor(new_golden_path_, beams_) g_id_ = index_of_tensor(new_golden_path_, beams_) golden_inside_ = tf.select(tf.less(tf.shape(g_id_)[0], 1), tf.constant(False, tf.bool), tf.constant(True, tf.bool)) step_valid_ = tf.logical_and(tf.less(step_+1, length), tf.less(step_+1, self.max_step_tracked)) return [step_ + 1, beams_, beam_value_, golden_value_, golden_inside_, step_valid_, g_id_, golden_record, beam_record]
def set_logp_to_neg_inf(X, logp, bounds): """Set `logp` to negative infinity when `X` is outside the allowed bounds. # Arguments X: tensorflow.Tensor The variable to apply the bounds to logp: tensorflow.Tensor The log probability corrosponding to `X` bounds: list of `Region` objects The regions corrosponding to allowed regions of `X` # Returns logp: tensorflow.Tensor The newly bounded log probability """ conditions = [] for l, u in bounds: lower_is_neg_inf = not isinstance(l, tf.Tensor) and np.isneginf(l) upper_is_pos_inf = not isinstance(u, tf.Tensor) and np.isposinf(u) if not lower_is_neg_inf and upper_is_pos_inf: conditions.append(tf.greater(X, l)) elif lower_is_neg_inf and not upper_is_pos_inf: conditions.append(tf.less(X, u)) elif not (lower_is_neg_inf or upper_is_pos_inf): conditions.append(tf.logical_and(tf.greater(X, l), tf.less(X, u))) if len(conditions) > 0: is_inside_bounds = conditions[0] for condition in conditions[1:]: is_inside_bounds = tf.logical_or(is_inside_bounds, condition) logp = tf.select(is_inside_bounds, logp, tf.fill(tf.shape(X), config.dtype(-np.inf))) return logp
def check_integrity_and_batch(*datasets): """Checks whether a sequence of frames are from the same video. Args: *datasets: datasets each skipping 1 frame from the previous one. Returns: batched data and the integrity flag. """ not_broken = tf.constant(True) if "frame_number" in datasets[0]: frame_numbers = [dataset["frame_number"][0] for dataset in datasets] not_broken = tf.equal( frame_numbers[-1] - frame_numbers[0], num_frames-1) if self.only_keep_videos_from_0th_frame: not_broken = tf.logical_and(not_broken, tf.equal(frame_numbers[0], 0)) else: tf.logging.warning("use_not_breaking_batching is True but " "no frame_number is in the dataset.") features = {} for key in datasets[0].keys(): values = [dataset[key] for dataset in datasets] batch = tf.stack(values) features[key] = batch return features, not_broken
def _has_foreground_and_background_in_first_frame_2(label, decoder_output_stride): """Checks if the labels have foreground and background in the first frame. Second attempt, this time we use the actual output dimension for resizing. Args: label: Label tensor of shape [num_frames, height, width, 1]. decoder_output_stride: Integer, the stride of the decoder output. Returns: Boolean, whether the labels have foreground and background in the first frame. """ h, w = train_utils.resolve_shape(label)[1:3] h_sub = model.scale_dimension(h, 1.0 / decoder_output_stride) w_sub = model.scale_dimension(w, 1.0 / decoder_output_stride) label_downscaled = tf.squeeze( tf.image.resize_nearest_neighbor(label[0, tf.newaxis], [h_sub, w_sub], align_corners=True), axis=0) is_bg = tf.equal(label_downscaled, 0) is_fg = tf.logical_not(is_bg) # Just using reduce_any was not robust enough, so lets make sure the count # is above MIN_LABEL_COUNT. fg_count = tf.reduce_sum(tf.cast(is_fg, tf.int32)) bg_count = tf.reduce_sum(tf.cast(is_bg, tf.int32)) has_bg = tf.greater_equal(fg_count, MIN_LABEL_COUNT) has_fg = tf.greater_equal(bg_count, MIN_LABEL_COUNT) return tf.logical_and(has_bg, has_fg)
def get_predictions_and_loss(self, inputs): tokens, context_word_emb, lm_emb, char_index, text_len, is_training, gold_labels = inputs self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout( self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(tokens)[0] max_sentence_length = tf.shape(tokens)[1] context_emb_list = [] context_emb_list.append(context_word_emb) char_emb = tf.gather( tf.get_variable( "char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"] ) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable( "lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape( lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers]) flattened_aggregated_lm_emb = tf.matmul( flattened_lm_emb, tf.expand_dims( self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape( flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout( context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask( text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] candidate_scores_mask = tf.logical_and( tf.expand_dims(text_len_mask, [1]), tf.expand_dims( text_len_mask, [2])) #[num_sentence, max_sentence_length,max_sentence_length] sentence_ends_leq_starts = tf.tile( tf.expand_dims( tf.logical_not( tf.sequence_mask(tf.range(max_sentence_length), max_sentence_length)), 0), [num_sentences, 1, 1 ]) #[num_sentence, max_sentence_length,max_sentence_length] candidate_scores_mask = tf.logical_and(candidate_scores_mask, sentence_ends_leq_starts) flattened_candidate_scores_mask = tf.reshape( candidate_scores_mask, [-1]) #[num_sentence * max_sentence_length * max_sentence_length] context_outputs = self.lstm_contextualize( context_emb, text_len, self.lstm_dropout) # [num_sentence, max_sentence_length, emb] with tf.variable_scope("candidate_starts_ffnn"): candidate_starts_emb = util.projection( context_outputs, self.config["ffnn_size"] ) #[num_sentences, max_sentences_length,emb] with tf.variable_scope("candidate_ends_ffnn"): candidate_ends_emb = util.projection( context_outputs, self.config["ffnn_size"] ) #[num_sentences, max_sentences_length, emb] candidate_ner_scores = util.bilinear_classifier( candidate_starts_emb, candidate_ends_emb, self.dropout, output_size=self.num_types + 1 ) #[num_sentence, max_sentence_length,max_sentence_length,types+1] candidate_ner_scores = tf.boolean_mask( tf.reshape(candidate_ner_scores, [-1, self.num_types + 1]), flattened_candidate_scores_mask) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=gold_labels, logits=candidate_ner_scores) loss = tf.reduce_sum(loss) return candidate_ner_scores, loss
def _loop_cond(unused_boxes, unused_threshold, output_size, idx): return tf.logical_and( tf.reduce_min(output_size) < max_output_size, idx < num_boxes // _NMS_TILE_SIZE)
def _filter_max_length(example, max_length=256): """Indicates whether the example's length is lower than the maximum length.""" return tf.logical_and( tf.size(example[0]) <= max_length, tf.size(example[1]) <= max_length)
def construct(self, args, source_chars, target_chars, bow, eow): with self.session.graph.as_default(): if args.recodex: tf.get_variable_scope().set_initializer(tf.glorot_uniform_initializer(seed=42)) # Inputs self.sentence_lens = tf.placeholder(tf.int32, [None], name="sentence_lens") self.source_ids = tf.placeholder(tf.int32, [None, None], name="source_ids") self.source_seqs = tf.placeholder(tf.int32, [None, None], name="source_seqs") self.source_seq_lens = tf.placeholder(tf.int32, [None], name="source_seq_lens") self.target_ids = tf.placeholder(tf.int32, [None, None], name="target_ids") self.target_seqs = tf.placeholder(tf.int32, [None, None], name="target_seqs") self.target_seq_lens = tf.placeholder(tf.int32, [None], name="target_seq_lens") # Append EOW after target_seqs target_seqs = tf.reverse_sequence(self.target_seqs, self.target_seq_lens, 1) target_seqs = tf.pad(target_seqs, [[0, 0], [1, 0]], constant_values=eow) target_seq_lens = self.target_seq_lens + 1 target_seqs = tf.reverse_sequence(target_seqs, target_seq_lens, 1) # Encoder # TODO: Generate source embeddings for source chars, of shape [source_chars, args.char_dim]. # TODO: Embed the self.source_seqs using the source embeddings. # TODO: Using a GRU with dimension args.rnn_dim, process the embedded self.source_seqs # using forward RNN and store the resulting states into `source_states`. # Index the unique words using self.source_ids and self.target_ids. sentence_mask = tf.sequence_mask(self.sentence_lens) source_states = tf.boolean_mask(tf.nn.embedding_lookup(source_states, self.source_ids), sentence_mask) source_lens = tf.boolean_mask(tf.nn.embedding_lookup(self.source_seq_lens, self.source_ids), sentence_mask) target_seqs = tf.boolean_mask(tf.nn.embedding_lookup(target_seqs, self.target_ids), sentence_mask) target_lens = tf.boolean_mask(tf.nn.embedding_lookup(target_seq_lens, self.target_ids), sentence_mask) # Decoder # TODO: Generate target embeddings for target chars, of shape [target_chars, args.char_dim]. # TODO: Embed the target_seqs using the target embeddings. # TODO: Generate a decoder GRU with wimension args.rnn_dim. # TODO: Create a `decoder_layer` -- a fully connected layer with # target_chars neurons used in the decoder to classify into target characters. # The DecoderTraining will be used during training. It will output logits for each # target character. class DecoderTraining(tf.contrib.seq2seq.Decoder): @property def batch_size(self): return # TODO: Return size of the batch, using for example source_states size @property def output_dtype(self): return tf.float32 # Type for logits of target characters @property def output_size(self): return target_chars # Length of logits for every output def initialize(self, name=None): finished = # TODO: False if target_lens > 0, True otherwise states = # TODO: Initial decoder state to use inputs = # TODO: embedded BOW characters of shape [self.batch_size] using target embeddings. # You can use tf.fill to generate BOWs of appropriate size. return finished, inputs, states def step(self, time, inputs, states, name=None): outputs, states = # TODO: Run the decoder GRU cell using inputs and states. outputs = # TODO: Apply the decoder_layer on outputs. next_input = # TODO: Next input are character embeddings with index `time` in target_embedded. finished = # TODO: False if target_lens > time + 1, True otherwise. return outputs, states, next_input, finished output_layer, _, _ = tf.contrib.seq2seq.dynamic_decode(DecoderTraining()) self.predictions_training = tf.argmax(output_layer, axis=2, output_type=tf.int32) # The DecoderPrediction will be used during prediction. It will # directly output the predicted target characters. class DecoderPrediction(tf.contrib.seq2seq.Decoder): @property def batch_size(self): return # TODO: Return size of the batch, using for example source_states size @property def output_dtype(self): return tf.int32 # Type for predicted target characters @property def output_size(self): return 1 # Will return just one output def initialize(self, name=None): finished = # TODO: False of shape [self.batch_size]. states = # TODO: Initial decoder state to use. inputs = # TODO: embedded BOW characters of shape [self.batch_size] using target embeddings. # You can use tf.fill to generate BOWs of appropriate size. return finished, inputs, states def step(self, time, inputs, states, name=None): outputs, states = # TODO: Run the decoder GRU cell using inputs and states. outputs = # TODO: Apply the decoder_layer on outputs. outputs = # TODO: Use tf.argmax to choose most probable class (supply parameter `output_type=tf.int32`). next_input = # TODO: Embed `outputs` using target_embeddings finished = # TODO: True where outputs==eow, False otherwise # Use tf.equal for the comparison, Python's '==' is not overloaded return outputs, states, next_input, finished self.predictions, _, self.prediction_lens = tf.contrib.seq2seq.dynamic_decode( DecoderPrediction(), maximum_iterations=tf.reduce_max(source_lens) + 10) # Training weights = tf.sequence_mask(target_lens, dtype=tf.float32) loss = tf.losses.sparse_softmax_cross_entropy(target_seqs, output_layer, weights=weights) global_step = tf.train.create_global_step() self.training = tf.train.AdamOptimizer().minimize(loss, global_step=global_step, name="training") # Summaries accuracy_training = tf.reduce_all(tf.logical_or( tf.equal(self.predictions_training, target_seqs), tf.logical_not(tf.sequence_mask(target_lens))), axis=1) self.current_accuracy_training, self.update_accuracy_training = tf.metrics.mean(accuracy_training) minimum_length = tf.minimum(tf.shape(self.predictions)[1], tf.shape(target_seqs)[1]) accuracy = tf.logical_and( tf.equal(self.prediction_lens, target_lens), tf.reduce_all(tf.logical_or( tf.equal(self.predictions[:, :minimum_length], target_seqs[:, :minimum_length]), tf.logical_not(tf.sequence_mask(target_lens, maxlen=minimum_length))), axis=1)) self.current_accuracy, self.update_accuracy = tf.metrics.mean(accuracy) self.current_loss, self.update_loss = tf.metrics.mean(loss, weights=tf.reduce_sum(weights)) self.reset_metrics = tf.variables_initializer(tf.get_collection(tf.GraphKeys.METRIC_VARIABLES)) summary_writer = tf.contrib.summary.create_file_writer(args.logdir, flush_millis=10 * 1000) self.summaries = {} with summary_writer.as_default(), tf.contrib.summary.record_summaries_every_n_global_steps(10): self.summaries["train"] = [tf.contrib.summary.scalar("train/loss", self.update_loss), tf.contrib.summary.scalar("train/accuracy", self.update_accuracy_training)] with summary_writer.as_default(), tf.contrib.summary.always_record_summaries(): for dataset in ["dev", "test"]: self.summaries[dataset] = [tf.contrib.summary.scalar(dataset + "/loss", self.current_loss), tf.contrib.summary.scalar(dataset + "/accuracy", self.current_accuracy)] # Initialize variables self.session.run(tf.global_variables_initializer()) with summary_writer.as_default(): tf.contrib.summary.initialize(session=self.session, graph=self.session.graph)
def _accuracy(logits, label): labels = tf.logical_and(label, tf.ones_like(logits, dtype=bool)) correct = tf.equal(tf.greater(logits, 0), labels) return tf.reduce_mean(tf.to_float(correct))
def _is_finite(t: TensorType) -> TensorType: return tf.logical_and(tf.math.is_finite(t), tf.logical_not(tf.math.is_nan(t)))
def losses(self, logits, localisations, gclasses, glocalisations, gscores, match_threshold, negative_ratio, loss_alpha, label_smoothing, device='/cpu:0'): with tf.name_scope('ssd_losses'): lshape = logits[0].get_shape().as_list() num_classes = lshape[-1] batch_size = lshape[0] flogits = [] fgclasses = [] fgscores = [] flocalisations = [] fglocalisations = [] for i in range(len(logits)): flogits.append(tf.reshape(logits[i], [-1, num_classes])) fgclasses.append(tf.reshape(gclasses[i], [-1])) fgscores.append(tf.reshape(gscores[i], [-1])) flocalisations.append(tf.reshape(localisations[i], [-1, 4])) fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4])) logits = tf.concat(flogits, axis=0) print logits gclasses = tf.concat(fgclasses, axis=0) print gclasses gscores = tf.concat(fgscores, axis=0) print gscores localisations = tf.concat(flocalisations, axis=0) print localisations glocalisations = tf.concat(fglocalisations, axis=0) print glocalisations dtype = logits.dtype # compute positive matching mask pmask = gscores > match_threshold print pmask fpmask = tf.cast(pmask, dtype) n_positives = tf.reduce_sum(fpmask) # hard negative mining no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits) print predictions nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5) print nmask fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask) print nvalues nvalues_flat = tf.reshape(nvalues, [-1]) print nvalues_flat # number of negative entries to select max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size n_neg = tf.minimum(n_neg, max_neg_entries) print n_neg val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) print val, idxes max_hard_pred = -val[-1] print max_hard_pred # final negative mask nmask = tf.logical_and(nmask, nvalues < max_hard_pred) fnmask = tf.cast(nmask, dtype) with tf.name_scope('cross_entropy_pos'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=gclasses) loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value') tf.losses.add_loss(loss) with tf.name_scope('cross_entropy_neg'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=no_classes) loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value') tf.losses.add_loss(loss) # add localisation loss: smooth l1 with tf.name_scope('localisation'): weights = tf.expand_dims(loss_alpha * fpmask, axis=-1) x = localisations - glocalisations absx = tf.abs(x) minx = tf.minimum(absx, 1) loss = 0.5 * ((absx - 1) * minx + absx) loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value') tf.losses.add_loss(loss)
def optimizers(self, loss_coarse, loss_fine, global_step, batchsize): samples_coarse = 2000000 samples_fine = 1500000 steps_coarse = samples_coarse // batchsize steps_fine = samples_fine // batchsize def create_optimizer(loss, rate, momentum, collections, name='Adam'): optimizer = tf.train.AdamOptimizer(rate, momentum, 1, name=name) vars = [] for c in collections: vars += tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, c) grad_vars = optimizer.compute_gradients(loss, var_list=vars) grad, vars = list(zip(*grad_vars)) return optimizer, grad_vars def coarse_optimizers(): opt1, grad_vars1 = create_optimizer(loss_coarse, 0.001, 0.9, ['coarse/conv'], 'CoarseConv') opt2, grad_vars2 = create_optimizer(loss_coarse, 0.1, 0.9, ['coarse/dense'], 'CoarseDense') control_deps = next(zip(*grad_vars1, *grad_vars2)) with tf.control_dependencies(control_deps): return tf.group( opt1.apply_gradients(grad_vars1, global_step), opt2.apply_gradients(grad_vars2) ) def fine_optimizers(): opt1, grad_vars1 = create_optimizer(loss_fine, 0.001, 0.9, ['fine/first', 'fine/third'], 'FineA') opt2, grad_vars2 = create_optimizer(loss_fine, 0.01, 0.9, ['fine/second'], 'FineB') control_deps = next(zip(*grad_vars1, *grad_vars2)) with tf.control_dependencies(control_deps): return tf.group( opt1.apply_gradients(grad_vars1, global_step), opt2.apply_gradients(grad_vars2) ) with tf.name_scope('optimizers'): cond_fine = tf.logical_and(steps_coarse <= global_step, global_step < (steps_coarse + steps_fine)) cond_coarse = global_step < steps_coarse train = tf.case( collections.OrderedDict([(cond_fine, fine_optimizers), (cond_coarse, coarse_optimizers)]), default=lambda: tf.group(tf.assign_add(global_step, 1)), exclusive=True, name='optimizers' ) phase = tf.case(collections.OrderedDict([(cond_fine, lambda: 2), (cond_coarse, lambda: 1)]), default=lambda: 3, exclusive=True, name='DeterminePhase') tf.summary.scalar('Phase', phase) return train
def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids): self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout(self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) if not self.lm_file: elmo_module = hub.Module("https://tfhub.dev/google/elmo/2") lm_embeddings = elmo_module( inputs={"tokens": tokens, "sequence_len": text_len}, signature="tokens", as_dict=True) word_emb = lm_embeddings["word_emb"] # [num_sentences, max_sentence_length, 512] lm_emb = tf.stack([tf.concat([word_emb, word_emb], -1), lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"]], -1) # [num_sentences, max_sentence_length, 1024, 3] lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax(tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable("lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape(lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers]) flattened_aggregated_lm_emb = tf.matmul(flattened_lm_emb, tf.expand_dims(self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape(flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout(context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout(head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] context_outputs = self.lstm_contextualize(context_emb, text_len, text_len_mask) # [num_words, emb] num_words = util.shape(context_outputs, 0) genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words] flattened_head_emb = self.flatten_emb_by_sentence(head_emb, text_len_mask) # [num_words] candidate_starts = tf.tile(tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width] candidate_ends = candidate_starts + tf.expand_dims(tf.range(self.max_span_width), 0) # [num_words, max_span_width] candidate_start_sentence_indices = tf.gather(flattened_sentence_indices, candidate_starts) # [num_words, max_span_width] candidate_end_sentence_indices = tf.gather(flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width] candidate_mask = tf.logical_and(candidate_ends < num_words, tf.equal(candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width] flattened_candidate_mask = tf.reshape(candidate_mask, [-1]) # [num_words * max_span_width] candidate_starts = tf.boolean_mask(tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates] candidate_ends = tf.boolean_mask(tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates] candidate_sentence_indices = tf.boolean_mask(tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates] candidate_cluster_ids = self.get_candidate_labels(candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates] candidate_span_emb = self.get_span_emb(flattened_head_emb, context_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores(candidate_span_emb) # [k, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k] k = tf.to_int32(tf.floor(tf.to_float(tf.shape(context_outputs)[0]) * self.config["top_span_ratio"])) top_span_indices = coref_ops.extract_spans(tf.expand_dims(candidate_mention_scores, 0), tf.expand_dims(candidate_starts, 0), tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0), util.shape(context_outputs, 0), True) # [1, k] top_span_indices.set_shape([1, None]) top_span_indices = tf.squeeze(top_span_indices, 0) # [k] top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k] top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k] top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb] top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k] top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k] top_span_sentence_indices = tf.gather(candidate_sentence_indices, top_span_indices) # [k] top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k] c = tf.minimum(self.config["max_top_antecedents"], k) if self.config["coarse_to_fine"]: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(top_span_emb, top_span_mention_scores, c) else: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(top_span_emb, top_span_mention_scores, c) dummy_scores = tf.zeros([k, 1]) # [k, 1] for i in range(self.config["coref_depth"]): with tf.variable_scope("coref_layer", reuse=(i > 0)): top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb] top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb) # [k, c] top_antecedent_weights = tf.nn.softmax(tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1] top_antecedent_emb = tf.concat([tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb] attended_span_emb = tf.reduce_sum(tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb] with tf.variable_scope("f"): f = tf.sigmoid(util.projection(tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb] top_span_emb = f * attended_span_emb + (1 - f) * top_span_emb # [k, emb] top_antecedent_scores = tf.concat([dummy_scores, top_antecedent_scores], 1) # [k, c + 1] top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c] top_antecedent_cluster_ids += tf.to_int32(tf.log(tf.to_float(top_antecedents_mask))) # [k, c] same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims(top_span_cluster_ids, 1)) # [k, c] non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1] pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c] dummy_labels = tf.logical_not(tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1] top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1] loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] loss = tf.reduce_sum(loss) # [] return [candidate_starts, candidate_ends, candidate_mention_scores, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores], loss
def rayTraceSinglePass( rays, boundarySegments, boundaryArcs, targetSegments, targetArcs, materials, epsilion=1e-6, ): if boundarySegments is not None: b_usingSegments = True else: b_usingSegments = False if boundaryArcs is not None: b_usingArcs = True else: b_usingArcs = False with tf.name_scope("rayTraceSingle") as scope: # rayRange is just a list of all ray indexes, useful for constructing index tensors to be used # with gather with tf.name_scope("rayRange") as scope: rayRange = tf.range( tf.shape(rays, out_type=tf.int64)[0], dtype=tf.int64, name="rayRange" ) # join boundaries and targets, for the purposes of finding the closest intersection with tf.name_scope("segmentTargetJoining") as scope: if b_usingSegments: opticalSegmentCount = tf.cast( tf.shape(boundarySegments)[0], dtype=tf.int64 ) else: opticalSegmentCount = 0 if targetSegments is not None: targetSegments = tf.pad(targetSegments, [[0, 0], [0, 2]]) if b_usingSegments: boundarySegments = tf.concat( (boundarySegments, targetSegments), 0, name="joinedBoundarySegments", ) elif targetSegments.shape[0] != 0: boundarySegments = targetSegments b_usingSegments = True with tf.name_scope("arcTargetJoining") as scope: if b_usingArcs: opticalArcCount = tf.cast(tf.shape(boundaryArcs)[0], dtype=tf.int64) else: opticalArcCount = 0 if targetArcs is not None: targetArcs = tf.pad(targetArcs, [[0, 0], [0, 2]]) if b_usingArcs: boundaryArcs = tf.concat( (boundaryArcs, targetArcs), 0, name="joinedBoundaryArcs" ) elif targetArcs.shape[0] != 0: boundaryArcs = targetArcs b_usingArcs = True # slice the input rays into sections with tf.name_scope("inputRaySlicing") as scope: xstart = rays[:, 0] ystart = rays[:, 1] xend = rays[:, 2] yend = rays[:, 3] # intersect rays and boundary segments if b_usingSegments: with tf.name_scope("ray-SegmentIntersection") as scope: with tf.name_scope("variableMeshing") as scope: xa1, xb1 = tf.meshgrid(xstart, boundarySegments[:, 0]) ya1, yb1 = tf.meshgrid(ystart, boundarySegments[:, 1]) xa2, xb2 = tf.meshgrid(xend, boundarySegments[:, 2]) ya2, yb2 = tf.meshgrid(yend, boundarySegments[:, 3]) xa = xa2 - xa1 ya = ya2 - ya1 xb = xb2 - xb1 yb = yb2 - yb1 # v is the parameter of the intersection for B (bounds), and u is for A (rays). inf values signify # that this pair of lines is parallel with tf.name_scope("raw_v_parameter") as scope: denominator = xa * yb - ya * xb validSegmentIntersection = tf.greater_equal( tf.abs(denominator), epsilion ) safe_value = tf.ones_like(denominator) safe_denominator = tf.where( validSegmentIntersection, denominator, safe_value ) segmentV = tf.where( validSegmentIntersection, (ya * (xb1 - xa1) - xa * (yb1 - ya1)) / safe_denominator, safe_value, ) with tf.name_scope("raw_u_parameter") as scope: segmentU = tf.where( validSegmentIntersection, (xb * (ya1 - yb1) - yb * (xa1 - xb1)) / safe_denominator, safe_value, ) # Since B encodes line segments, not infinite lines, purge all occurances in v which are <=0 or >=1 # since these imply rays that did not actually strike the segment, only intersected with its # infinite continuation. # And since A encodes semi-infinite rays, purge all occurances in u which are <epsilion, since # these are intersections that occur before the ray source. We need to compare to epsilion to take # account of rays that are starting on a boundary with tf.name_scope("selectClosestValidIntersection") as scope: validSegmentIntersection = tf.logical_and( validSegmentIntersection, tf.greater_equal(segmentV, -epsilion) ) validSegmentIntersection = tf.logical_and( validSegmentIntersection, tf.less_equal(segmentV, 1.0 + epsilion), ) validSegmentIntersection = tf.logical_and( validSegmentIntersection, tf.greater_equal(segmentU, epsilion) ) # true where a ray intersection was actually found (since raySegmentIndices = 0 if the ray # intersects with boundary 0, or if there was no intersection with tf.name_scope("raySegmentMask") as scope: raySegmentMask = tf.reduce_any(validSegmentIntersection, axis=0) # match segmentU to each ray with tf.name_scope("segmentU") as scope: # raySegmentIndices tells us which ray intersects with which boundary. # raySegmentIndices[n]=m => ray n intersects boundary segment m inf = 2 * tf.reduce_max(segmentU) * safe_value segmentU = tf.where(validSegmentIntersection, segmentU, inf) raySegmentIndices = tf.argmin( segmentU, axis=0, name="raySegmentIndices" ) # intersectIndicesSquare is a set of indices that can be used with gather_nd to select # positions out of the grid tensors intersectIndicesSquare = tf.transpose( tf.stack([raySegmentIndices, rayRange]) ) # the u parameter for ray intersections, after filtering and processing segmentU = tf.gather_nd( segmentU, intersectIndicesSquare, name="segmentU" ) # package and pair the boundary segments with the rays that intersect with them boundarySegments = tf.gather( boundarySegments, raySegmentIndices, name="boundarySegments" ) # intersect rays and boundary arcs if b_usingArcs: with tf.name_scope("ray-ArcIntersection") as scope: with tf.name_scope("inputMeshgrids") as scope: x1, xc = tf.meshgrid(xstart, boundaryArcs[:, 0]) y1, yc = tf.meshgrid(ystart, boundaryArcs[:, 1]) x2, thetaStart = tf.meshgrid(xend, boundaryArcs[:, 2]) y2, thetaEnd = tf.meshgrid(yend, boundaryArcs[:, 3]) y2, r = tf.meshgrid(tf.reshape(yend, [-1]), boundaryArcs[:, 4]) # the reshape in the above line shouldn't be necessary, but I was getting some really wierd # bugs that went away whenever I tried to read the damn tensor, and this fixes it for some # reason. # a, b, c here are parameters to a quadratic equation for u, so we have some special cases to deal # with # a = 0 => ray of length zero. This should never happen, but if it does, should invalidate # the intersections # rad < 0 => ray does not intersect circle # ????? # c = 0 => ray starts on circle => u = 0, -b/c # c = 0 => ray ends on circle??? My mind has changed on this with tf.name_scope("coordinateAdjusting") as scope: xr = (x1 - xc) / r yr = (y1 - yc) / r xd = (x2 - x1) / r yd = (y2 - y1) / r with tf.name_scope("quadraticEquationParts") as scope: with tf.name_scope("a") as scope: a = xd * xd + yd * yd with tf.name_scope("b") as scope: b = 2.0 * xr * xd + 2.0 * yr * yd with tf.name_scope("c") as scope: c = xr * xr + yr * yr - 1.0 with tf.name_scope("rad") as scope: rad = b * b - 4.0 * a * c safe_value = tf.ones_like(a, name="safe_value") with tf.name_scope("raw_u_parameter") as scope: # u will be the parameter of the intersections along the ray # rad < 0 special case with tf.name_scope("specialCase_complex") as scope: radLess = tf.less(rad, 0) uminus_valid = uplus_valid = tf.logical_not(radLess) safe_rad = tf.where(radLess, safe_value, rad) uminus = tf.where(radLess, safe_value, (-b - tf.sqrt(safe_rad))) uplus = tf.where(radLess, safe_value, (-b + tf.sqrt(safe_rad))) # a = 0 special case with tf.name_scope("specialCase_azero") as scope: azero = tf.less(tf.abs(a), epsilion) safe_a = tf.where(azero, safe_value, 2 * a) uminus_valid = tf.logical_and( uminus_valid, tf.logical_not(azero) ) uminus = tf.where(azero, safe_value, uminus / safe_a) uplus_valid = tf.logical_and(uplus_valid, tf.logical_not(azero)) uplus = tf.where(azero, safe_value, uplus / safe_a) """ czero = tf.less(tf.abs(c), epsilion) safe_c = tf.where(czero, safe_value, c) uplus_valid = tf.logical_and(uplus_valid, tf.logical_not(czero)) b_over_c = tf.where(czero, safe_value, b/safe_c) uplus = tf.where(azero, -b_over_c, uplus/safe_a) #uplus = tf.where(azero, -b/c, uplus/safe_a)""" # cut out all of the rays that have a u < epsilion parameter, since we only want reactions # ahead of the ray with tf.name_scope("cullNegativeU") as scope: uminus_valid = tf.logical_and( uminus_valid, tf.greater_equal(uminus, epsilion) ) uplus_valid = tf.logical_and( uplus_valid, tf.greater_equal(uplus, epsilion) ) with tf.name_scope("raw_v_parameter") as scope: # determine the x,y coordinate of the intersections with tf.name_scope("xminus") as scope: xminus = x1 + (x2 - x1) * uminus with tf.name_scope("xplus") as scope: xplus = x1 + (x2 - x1) * uplus with tf.name_scope("yminus") as scope: yminus = y1 + (y2 - y1) * uminus with tf.name_scope("yplus") as scope: yplus = y1 + (y2 - y1) * uplus # determine the angle along the arc (arc's parameter) where the intersection occurs """ these atan2 calls seem to be f*****g up the gradient. So I have to do something convoluted.""" """ finiteUMinus = tf.debugging.is_finite(uminus) finiteUPlus = tf.debugging.is_finite(uplus) def safe_atan2(y, x, safe_mask): with tf.name_scope("safe_atan") as scope: safe_x = tf.where(safe_mask, x, tf.ones_like(x)) safe_y = tf.where(safe_mask, y, tf.ones_like(y)) return tf.where(safe_mask, tf.atan2(safe_y, safe_x), tf.zeros_like(safe_x))""" vminus = tf.atan2(yminus - yc, xminus - xc) # vminus = safe_atan2(yminus-yc, xminus-xc, finiteUMinus) vminus = tf.floormod(vminus, 2 * PI) vplus = tf.atan2(yplus - yc, xplus - xc) # vplus = safe_atan2(yplus-yc, xplus-xc, finiteUPlus) vplus = tf.floormod(vplus, 2 * PI) # Cut out all cases where v does not fall within the angular extent of the arc with tf.name_scope("selectValid_v") as scope: # my angle in interval algorithm fails when the interval is full (0->2PI). So making the # following adjustment to thetaStart thetaStart = thetaStart + epsilion vminus_valid = tf.less_equal( tf.floormod(vminus - thetaStart, 2 * PI), tf.floormod(thetaEnd - thetaStart, 2 * PI), ) uminus_valid = tf.logical_and(vminus_valid, uminus_valid) vplus_valid = tf.less_equal( tf.floormod(vplus - thetaStart, 2 * PI), tf.floormod(thetaEnd - thetaStart, 2 * PI), ) uplus_valid = tf.logical_and(vplus_valid, uplus_valid) # now we can finally select between the plus and minus cases # arcU = tf.where(tf.less(uminus, uplus), uminus, uplus, name="arcU") # arcV = tf.where(tf.less(uminus, uplus), vminus, vplus, name="arcV") with tf.name_scope("choosePlusOrMinus") as scope: # We have been keeping track of valid and invalid intersections in the u+/-_valid tensors. But # now we need to compare the values in the u+/- tensors and prepare for the argmin call that # finds only the closest intersections. To do this we now need to fill the invalid values in # each tensor with some value that is larger than any valid value. Unfortunately we cannot # use np.inf because that seems to mess with the gradient calculator. inf = ( 2 * safe_value * tf.reduce_max([tf.reduce_max(uminus), tf.reduce_max(uplus)]) ) uminus = tf.where(uminus_valid, uminus, inf) uplus = tf.where(uplus_valid, uplus, inf) choose_uminus = tf.less(uminus, uplus) uminus_valid = tf.logical_and(uminus_valid, choose_uminus) uplus_valid = tf.logical_and( uplus_valid, tf.logical_not(choose_uminus) ) # rayArcMask will tell us which rays have found at least one valid arc intersection rayArcMask = tf.logical_or(uminus_valid, uplus_valid) rayArcMask = tf.reduce_any(rayArcMask, axis=0) arcU = tf.where(choose_uminus, uminus, uplus) arcV = tf.where(choose_uminus, vminus, vplus) """ # true where a ray intersection was actually found with tf.name_scope("rayArcMask") as scope: rayArcMask = tf.is_finite(arcU) rayArcMask = tf.reduce_any(rayArcMask, axis=0)""" # match arcU to each ray with tf.name_scope("arcU_and_arcV") as scope: # rayArcIndices tells us which ray intersects with which boundary. # rayArcIndices[n]=m => ray n intersects boundary segment m rayArcIndices = tf.argmin(arcU, axis=0, name="rayArcIndices") # intersectIndicesSquare is a set of indices that can be used with gather_nd to select # positions out of the grid tensors intersectIndicesSquare = tf.transpose( tf.stack([rayArcIndices, rayRange]) ) # the u parameter for ray intersections, after filtering and processing arcU = tf.gather_nd(arcU, intersectIndicesSquare, name="arcU") arcV = tf.gather_nd(arcV, intersectIndicesSquare, name="arcV") # package and pair the boundary arcs with the rays that intersect with them boundaryArcs = tf.gather( boundaryArcs, rayArcIndices, name="boundaryArcs" ) # determine which rays are dead with tf.name_scope("deadRays") as scope: if b_usingSegments and b_usingArcs: deadRays = tf.boolean_mask( rays, tf.logical_not(tf.logical_or(rayArcMask, raySegmentMask)), name="deadRays", ) else: if b_usingSegments: deadRays = tf.boolean_mask( rays, tf.logical_not(raySegmentMask), name="deadRays" ) elif b_usingArcs: deadRays = tf.boolean_mask( rays, tf.logical_not(rayArcMask), name="deadRays" ) else: raise RuntimeError( "rayTraceSinglePass: no boundaries provided for raytracing" ) # select between segment and arc intersections with tf.name_scope("arc_segment_selection") as scope: if b_usingSegments and b_usingArcs: chooseSegment = tf.logical_and( tf.less(segmentU, arcU), raySegmentMask, name="chooseSegment" ) chooseSegment = tf.logical_or( chooseSegment, tf.logical_and(raySegmentMask, tf.logical_not(rayArcMask)), ) chooseArc = tf.logical_and( tf.logical_not(chooseSegment), rayArcMask, name="chooseArc" ) chooseArc = tf.logical_or( chooseArc, tf.logical_and(rayArcMask, tf.logical_not(raySegmentMask)), ) else: if b_usingSegments: chooseSegment = raySegmentMask if b_usingArcs: chooseArc = rayArcMask # project ALL rays into the boundaries. Rays that do not intersect with any boundaries will also be # projected to zero length, but these will be filtered off later with tf.name_scope("rayProjection") as scope: if b_usingSegments: with tf.name_scope("segments") as scope: xstart = rays[:, 0] ystart = rays[:, 1] xend = rays[:, 2] yend = rays[:, 3] xend = xstart + (xend - xstart) * segmentU yend = ystart + (yend - ystart) * segmentU reactedRays_Segment = tf.stack( [xstart, ystart, xend, yend, rays[:, 4], rays[:, 5]], axis=1 ) if b_usingArcs: with tf.name_scope("arcs") as scope: xstart = rays[:, 0] ystart = rays[:, 1] xend = rays[:, 2] yend = rays[:, 3] xend = xstart + (xend - xstart) * arcU yend = ystart + (yend - ystart) * arcU reactedRays_Arc = tf.stack( [xstart, ystart, xend, yend, rays[:, 4], rays[:, 5]], axis=1 ) # determine which rays are finished with tf.name_scope("finishedRays") as scope: finishedRays = tf.zeros([0, 6], dtype=tf.float64) if b_usingSegments: finishedSegmentMask = tf.greater_equal( raySegmentIndices, opticalSegmentCount, name="finishedSegmentMask" ) fsMask = tf.logical_and(finishedSegmentMask, chooseSegment) finishedRays_Segment = tf.boolean_mask(reactedRays_Segment, fsMask) finishedRays = tf.cond( tf.reduce_any(fsMask), lambda: tf.concat([finishedRays, finishedRays_Segment], axis=0), lambda: finishedRays, ) if b_usingArcs: finishedArcMask = tf.greater_equal( rayArcIndices, opticalArcCount, name="finishedArcMask" ) faMask = tf.logical_and(finishedArcMask, chooseArc) finishedRays_Arc = tf.boolean_mask(reactedRays_Arc, faMask) finishedRays = tf.cond( tf.reduce_any(faMask), lambda: tf.concat([finishedRays, finishedRays_Arc], axis=0), lambda: finishedRays, ) # conjugate to finished rays with tf.name_scope("reactedRays") as scope: reactedRays = tf.zeros([0, 6], dtype=tf.float64) if b_usingSegments: chooseSegment = tf.logical_and( tf.logical_not(finishedSegmentMask), chooseSegment ) reactedRays_Segment = tf.boolean_mask( reactedRays_Segment, chooseSegment, name="reactedRays_Segment" ) boundarySegments = tf.boolean_mask( boundarySegments, chooseSegment, name="boundarySegments" ) reactedRays = tf.cond( tf.reduce_any(chooseSegment), lambda: tf.concat([reactedRays, reactedRays_Segment], axis=0), lambda: reactedRays, ) if b_usingArcs: chooseArc = tf.logical_and(tf.logical_not(finishedArcMask), chooseArc) reactedRays_Arc = tf.boolean_mask( reactedRays_Arc, chooseArc, name="reactedRays_Arc" ) arcV = tf.boolean_mask(arcV, chooseArc, name="arcV") boundaryArcs = tf.boolean_mask( boundaryArcs, chooseArc, name="boundaryArcs" ) reactedRays = tf.cond( tf.reduce_any(chooseArc), lambda: tf.concat([reactedRays, reactedRays_Arc], axis=0), lambda: reactedRays, ) # calculate the norm of the surface with tf.name_scope("norm") as scope: norm = tf.zeros([0], dtype=tf.float64) if b_usingSegments: normSegment = ( tf.atan2( boundarySegments[:, 3] - boundarySegments[:, 1], boundarySegments[:, 2] - boundarySegments[:, 0], name="normSegment", ) + PI / 2 ) norm = tf.cond( tf.reduce_any(chooseSegment), lambda: tf.concat([norm, normSegment], axis=0), lambda: norm, ) if b_usingArcs: normArc = tf.where( tf.less(boundaryArcs[:, 4], 0), arcV + PI, arcV, name="normArc" ) normArc = tf.floormod(normArc, 2 * PI) norm = tf.cond( tf.reduce_any(chooseArc), lambda: tf.concat([norm, normArc], axis=0), lambda: norm, ) with tf.name_scope("refractiveIndex") as scope: # calculate the refractive index for every material and ray wavelengths = reactedRays[:, 4] nstack = tf.stack( [each(wavelengths) for each in materials], axis=1, name="nstack" ) rayRange = tf.range( tf.shape(reactedRays)[0], dtype=tf.int32, name="rayRange" ) # select just the correct entry for n_in and n_out if b_usingSegments and b_usingArcs: n_in_indices = tf.concat( [boundarySegments[:, 4], boundaryArcs[:, 5]], axis=0, name="n_in_indices", ) else: if b_usingSegments: n_in_indices = boundarySegments[:, 4] if b_usingArcs: n_in_indices = boundaryArcs[:, 5] n_in_indices = tf.cast(n_in_indices, tf.int32) n_in_indices = tf.transpose(tf.stack([rayRange, n_in_indices])) n_in = tf.gather_nd(nstack, n_in_indices, name="n_in") if b_usingSegments and b_usingArcs: n_out_indices = tf.concat( [boundarySegments[:, 5], boundaryArcs[:, 6]], axis=0, name="n_out_indices", ) else: if b_usingSegments: n_out_indices = boundarySegments[:, 5] if b_usingArcs: n_out_indices = boundaryArcs[:, 6] n_out_indices = tf.cast(n_out_indices, tf.int32) n_out_indices = tf.transpose(tf.stack([rayRange, n_out_indices])) n_out = tf.gather_nd(nstack, n_out_indices, name="n_out") activeRays = react(reactedRays, norm, n_in, n_out) return reactedRays, activeRays, finishedRays, deadRays
def call(self, inputs, training=True): _zero = tf.constant(0.0, dtype='float32') _nan = tf.constant(0.0, dtype='float32') s = inputs.shape tstim = tf.where(tf.math.is_nan(inputs), _zero, inputs) if self.x0 is not None: # x0 should be tf variable to avoid retraces # TODO: is this expanding along the right dim? tstim dims: (None, time, chans) tstim = tstim - tf.expand_dims(self.x0, axis=1) # convert a & tau units from sec to bins ui = tf.math.abs(tf.reshape(self.u, (1, -1))) / self.fs * 100 taui = tf.math.abs(tf.reshape(self.tau, (1, -1))) * self.fs # convert chunksize from sec to bins chunksize = 5 chunksize = int(chunksize * self.fs) if self.crosstalk: # assumes dim of u is 1 ! tstim = tf.math.reduce_mean(tstim, axis=0, keepdims=True) ui = tf.expand_dims(ui, axis=0) taui = tf.expand_dims(taui, axis=0) @tf.function def _cumtrapz(x, dx=1., initial=0.): x = (x[:, :-1] + x[:, 1:]) / 2.0 x = tf.pad(x, ((0, 0), (1, 0), (0, 0)), constant_values=initial) return tf.cumsum(x, axis=1) * dx a = tf.cast(1.0 / taui, 'float64') x = ui * tstim if self.reset_signal is None: reset_times = tf.range(0, s[1] + chunksize - 1, chunksize) else: reset_times = tf.where(self.reset_signal[0, :])[:, 0] reset_times = tf.pad(reset_times, ((0, 1),), constant_values=s[1]) td = [] x0, imu0 = 0.0, 0.0 for j in range(reset_times.shape[0] - 1): xi = tf.cast(x[:, reset_times[j]:reset_times[j + 1], :], 'float64') ix = _cumtrapz(a + xi, dx=1, initial=0) + a + (x0 + xi[:, :1]) / 2.0 mu = tf.exp(ix) imu = _cumtrapz(mu * xi, dx=1, initial=0) + (x0 + mu[:, :1] * xi[:, :1]) / 2.0 + imu0 valid = tf.logical_and(mu > 0.0, imu > 0.0) mu = tf.where(valid, mu, 1.0) imu = tf.where(valid, imu, 1.0) _td = 1 - tf.exp(tf.math.log(imu) - tf.math.log(mu)) _td = tf.where(valid, _td, 1.0) x0 = xi[:, -1:] imu0 = imu[:, -1:] / mu[:, -1:] td.append(tf.cast(_td, 'float32')) td = tf.concat(td, axis=1) #ret = tstim * td # offset depression by one to allow transients ret = tstim * tf.pad(td[:, :-1, :], ((0, 0), (1, 0), (0, 0)), constant_values=1.0) ret = tf.where(tf.math.is_nan(inputs), _nan, ret) return ret
def cond(i, base_state, high_states, prev_y, prev_emb, y_array): return tf.logical_and(tf.less(i, decoder.translation_maxlen), tf.reduce_any(tf.not_equal(prev_y, 0)))
def get_random_data(image, xmins, xmaxs, ymins, ymaxs, labels, input_shape, min_scale=0.25, max_scale=2, jitter=0.3, min_gamma=0.8, max_gamma=2, blur=False, flip=True, hue=.5, sat=.5, val=0., cont=.1, noise=0, max_boxes=20, min_jpeg_quality=80, max_jpeg_quality=100, train: bool = True): '''random preprocessing for real-time data augmentation''' input_shape=tf.keras.backend.get_value(input_shape) iw, ih = tf.cast(tf.shape(image)[1], tf.float32), tf.cast(tf.shape(image)[0], tf.float32) w, h = tf.cast(input_shape[1], tf.float32), tf.cast(input_shape[0], tf.float32) xmaxs = tf.expand_dims(xmaxs, 0) xmins = tf.expand_dims(xmins, 0) ymaxs = tf.expand_dims(ymaxs, 0) ymins = tf.expand_dims(ymins, 0) labels = tf.expand_dims(labels, 0) if train: new_ar = (w / h) * (tf.random.uniform([], 1 - jitter, 1 + jitter) / tf.random.uniform([], 1 - jitter, 1 + jitter)) scale = tf.random.uniform([], min_scale, max_scale) ratio = tf.cond(tf.less( new_ar, 1), lambda: scale * new_ar, lambda: scale / new_ar) ratio = tf.maximum(ratio, 1) nw, nh = tf.cond(tf.less( new_ar, 1), lambda: (ratio * h, scale * h), lambda: (scale * w, ratio * w)) dx = tf.random.uniform([], 0, w - nw) dy = tf.random.uniform([], 0, h - nh) image = tf.image.resize(image, [tf.cast(nh, tf.int32), tf.cast(nw, tf.int32)]) def crop_and_pad(image, dx, dy): dy = tf.cast(tf.math.maximum(-dy, 0), tf.int32) dx = tf.cast(tf.math.maximum(-dx, 0), tf.int32) image = tf.image.crop_to_bounding_box( image, dy, dx, tf.math.minimum(tf.cast(h, tf.int32), tf.cast(nh, tf.int32)), tf.math.minimum(tf.cast(w, tf.int32), tf.cast(nw, tf.int32))) image = tf.image.pad_to_bounding_box(image, 0, 0, tf.cast(h, tf.int32), tf.cast(w, tf.int32)) return image new_image = tf.cond( tf.greater(scale, 1), lambda: crop_and_pad(image, dx, dy), lambda: tf.image .pad_to_bounding_box(image, tf.cast(tf.math.maximum( dy, 0), tf.int32), tf.cast(tf.math.maximum(dx, 0), tf.int32), tf.cast(h, tf.int32), tf.cast(w, tf.int32))) image_color_padded = tf.cast(tf.equal(new_image, 0), tf.float32) * (128 / 255) image = image_color_padded + new_image xmins = xmins * nw / iw + dx xmaxs = xmaxs * nw / iw + dx ymins = ymins * nh / ih + dy ymaxs = ymaxs * nh / ih + dy if flip: image, xmins, xmaxs = tf.cond( tf.less( tf.random.uniform([]), 0.5), lambda: (tf.image.flip_left_right(image), w - xmaxs, w - xmins), lambda: (image, xmins, xmaxs)) if hue > 0: image = tf.image.random_hue(image, hue) if sat > 1: image = tf.image.random_saturation(image, 1 - sat, 1 + sat) if val > 0: image = tf.image.random_brightness(image, val) if min_gamma < max_gamma: image = random_gamma(image, min_gamma, max_gamma) if cont > 1: image = tf.image.random_contrast(image, 1 - cont, 1 + cont) if min_jpeg_quality < max_jpeg_quality: image = tf.image.random_jpeg_quality(image, min_jpeg_quality, max_jpeg_quality) if noise > 0: image = image + tf.cast( tf.random.uniform(shape=[input_shape[1], input_shape[0], 3], minval=0, maxval=noise), tf.float32) if blur: image = random_blur(image) else: nh = ih * tf.minimum(w / iw, h / ih) nw = iw * tf.minimum(w / iw, h / ih) dx = (w - nw) / 2 dy = (h - nh) / 2 image = tf.image.resize(image, [tf.cast(nh, tf.int32), tf.cast(nw, tf.int32)]) new_image = tf.image.pad_to_bounding_box(image, tf.cast(dy, tf.int32), tf.cast(dx, tf.int32), tf.cast(h, tf.int32), tf.cast(w, tf.int32)) image_color_padded = tf.cast(tf.equal(new_image, 0), tf.float32) * (128 / 255) image = image_color_padded + new_image xmins = xmins * nw / iw + dx xmaxs = xmaxs * nw / iw + dx ymins = ymins * nh / ih + dy ymaxs = ymaxs * nh / ih + dy bbox = tf.concat([xmins, ymins, xmaxs, ymaxs, tf.cast(labels, tf.float32)], 0) bbox = tf.transpose(bbox, [1, 0]) image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) bbox = tf.clip_by_value(bbox, clip_value_min=0, clip_value_max=tf.cast(input_shape[0] - 1, tf.float32)) bbox_w = bbox[..., 2] - bbox[..., 0] bbox_h = bbox[..., 3] - bbox[..., 1] bbox = tf.boolean_mask(bbox, tf.logical_and(bbox_w > 1, bbox_h > 1)) bbox = tf.cond(tf.greater( tf.shape(bbox)[0], max_boxes), lambda: bbox[:max_boxes], lambda: bbox) return image, bbox
def cond(i, prev_base_states, prev_high_states, prev_ys, prev_embs, cost, ys_array, p_array): return tf.logical_and(tf.less(i, translation_maxlen), tf.reduce_any(tf.not_equal(prev_ys, 0)))
def _not_boundary(trajectories, _): condition_1 = ~trajectories.is_boundary()[0] condition_2 = ~( tf.logical_and(~trajectories.is_last()[0], trajectories.is_first()[1]) ) # to be on the safe side... return tf.logical_and(condition_1, condition_2)
def main(_): with tf.Session() as sess: #tf.set_random_seed(4285) epochs = 1 batch_size = 3 # must divide dataset size (some strange error occurs if not) image_size = 128 tfrecords_file_in = '/data/cvg/lukas/datasets/coco/2017_training/tfrecords_l2mix_flip_tile_10-L2nn_4285/181115/' # '../data/train-00011-of-00060.tfrecords' filedir_out_base = '../logs/prepare_tensors_for_CLS' # tile_filedir_in = '/data/cvg/lukas/datasets/coco/2017_training/clustering_224x224_4285/' # tile_filedir_out = '~/results/knn_results/' path_tile_base = tf.constant( "/data/cvg/lukas/datasets/coco/2017_training/clustering_224x224_4285/" ) reader = tf.TFRecordReader() read_fn = lambda name: read_record(name, reader, image_size) # filename, train_images, t1_10nn_ids, t2_10nn_ids, t3_10nn_ids, t4_10nn_ids, t1_10nn_subids, t2_10nn_subids, t3_10nn_subids, t4_10nn_subids = get_pipeline(tfrecords_file_in, batch_size, epochs, read_fn) filenames, train_images, t1_10nn_ids, t1_10nn_subids, t1_10nn_L2, t2_10nn_ids, t2_10nn_subids, t2_10nn_L2, t3_10nn_ids, t3_10nn_subids, t3_10nn_L2, t4_10nn_ids, t4_10nn_subids, t4_10nn_L2 = \ get_pipeline(tfrecords_file_in, batch_size, epochs, read_fn) images_I_ref = train_images print('t1_10nn_ids ', t1_10nn_ids) t1_10nn_ids = tf.reshape(tf.sparse.to_dense(t1_10nn_ids), (batch_size, -1)) print('t1_10nn_ids ', t1_10nn_ids) t1_10nn_L2 = tf.reshape(tf.sparse.to_dense(t1_10nn_L2), (batch_size, -1)) print('t1_10nn_L2 ', t1_10nn_L2) t1_10nn_subids = tf.reshape(tf.sparse.to_dense(t1_10nn_subids), (batch_size, -1)) t2_10nn_ids = tf.reshape(tf.sparse.to_dense(t2_10nn_ids), (batch_size, -1)) t2_10nn_L2 = tf.reshape(tf.sparse.to_dense(t2_10nn_L2), (batch_size, -1)) t2_10nn_subids = tf.reshape(tf.sparse.to_dense(t2_10nn_subids), (batch_size, -1)) t3_10nn_ids = tf.reshape(tf.sparse.to_dense(t3_10nn_ids), (batch_size, -1)) t3_10nn_subids = tf.reshape(tf.sparse.to_dense(t3_10nn_subids), (batch_size, -1)) t3_10nn_L2 = tf.reshape(tf.sparse.to_dense(t3_10nn_L2), (batch_size, -1)) t4_10nn_ids = tf.reshape(tf.sparse.to_dense(t4_10nn_ids), (batch_size, -1)) t4_10nn_subids = tf.reshape(tf.sparse.to_dense(t4_10nn_subids), (batch_size, -1)) t4_10nn_L2 = tf.reshape(tf.sparse.to_dense(t4_10nn_L2), (batch_size, -1)) nn_id = tf.random_uniform([batch_size], 0, 9, dtype=tf.int32) tile_size = image_size / 2 assert tile_size.is_integer() tile_size = int(tile_size) underscore = tf.constant("_") # t1 ############################################################################################ path_prefix_t1 = path_tile_base + tf.constant("t1/") filetype = tf.constant("_t1.jpg") for id in range(batch_size): t1_10nn_ids_b = t1_10nn_ids[id] index = nn_id[id] t1_10nn_id = tf.gather(t1_10nn_ids_b, index) t1_10nn_id_str = tf.as_string(t1_10nn_id) t1_10nn_subids_b = t1_10nn_subids[id] t1_10nn_subid = tf.gather(t1_10nn_subids_b, index) t1_10nn_subid_str = tf.as_string(t1_10nn_subid) postfix = underscore + t1_10nn_subid_str + filetype fname = get_filename(t1_10nn_id_str, postfix) t1_10nn_fnames = fname if id == 0 else tf.concat( axis=0, values=[t1_10nn_fnames, fname]) with tf.control_dependencies([ tf.assert_equal(batch_size, t1_10nn_fnames.shape[0]), tf.assert_equal(tf.strings.length(t1_10nn_fnames), 21) ]): print(t1_10nn_fnames.shape) t1_10nn_fnames = tf.strings.join([path_prefix_t1, t1_10nn_fnames]) print('<<<<<<<<<<<<<<<<<<<') print(t1_10nn_fnames.shape) print('<<<<<<<<<<<<<<<<<<<') print('t1_10nn_fnames.shape: %s' % str(t1_10nn_fnames.shape)) for id in range(batch_size): file = tf.read_file(t1_10nn_fnames[id]) print(file) file = tf.image.decode_jpeg(file) file = resize_img(file, tile_size, batch_size) file = tf.expand_dims(file, 0) t1_10nn_images = file if id == 0 else tf.concat( axis=0, values=[t1_10nn_images, file]) print('train_images.shape..:', train_images.shape) print('t1_10nn_images.shape:', t1_10nn_images.shape) # t2 ############################################################################################ path_prefix_t2 = path_tile_base + tf.constant("t2/") filetype = tf.constant("_t2.jpg") for id in range(batch_size): t2_10nn_ids_b = t2_10nn_ids[id] index = nn_id[id] t2_10nn_id = tf.gather(t2_10nn_ids_b, index) t2_10nn_id_str = tf.as_string(t2_10nn_id) t2_10nn_subids_b = t2_10nn_subids[id] t2_10nn_subid = tf.gather(t2_10nn_subids_b, index) t2_10nn_subid_str = tf.as_string(t2_10nn_subid) postfix = underscore + t2_10nn_subid_str + filetype fname = get_filename(t2_10nn_id_str, postfix) t2_10nn_fnames = fname if id == 0 else tf.concat( axis=0, values=[t2_10nn_fnames, fname]) with tf.control_dependencies([ tf.assert_equal(batch_size, t2_10nn_fnames.shape[0]), tf.assert_equal(tf.strings.length(t2_10nn_fnames), 21) ]): print(t2_10nn_fnames.shape) t2_10nn_fnames = tf.strings.join([path_prefix_t2, t2_10nn_fnames]) print('<<<<<<<<<<<<<<<<<<<') print(t2_10nn_fnames.shape) print('<<<<<<<<<<<<<<<<<<<') print('t2_10nn_fnames.shape: %s' % str(t2_10nn_fnames.shape)) for id in range(batch_size): file = tf.read_file(t2_10nn_fnames[id]) print(file) file = tf.image.decode_jpeg(file) file = resize_img(file, tile_size, batch_size) file = tf.expand_dims(file, 0) t2_10nn_images = file if id == 0 else tf.concat( axis=0, values=[t2_10nn_images, file]) print('train_images.shape..:', train_images.shape) print('t2_10nn_images.shape:', t2_10nn_images.shape) # t3 ############################################################################################ path_prefix_t3 = path_tile_base + tf.constant("t3/") filetype = tf.constant("_t3.jpg") for id in range(batch_size): t3_10nn_ids_b = t3_10nn_ids[id] index = nn_id[id] t3_10nn_id = tf.gather(t3_10nn_ids_b, index) t3_10nn_id_str = tf.as_string(t3_10nn_id) t3_10nn_subids_b = t3_10nn_subids[id] t3_10nn_subid = tf.gather(t3_10nn_subids_b, index) t3_10nn_subid_str = tf.as_string(t3_10nn_subid) postfix = underscore + t3_10nn_subid_str + filetype fname = get_filename(t3_10nn_id_str, postfix) t3_10nn_fnames = fname if id == 0 else tf.concat( axis=0, values=[t3_10nn_fnames, fname]) with tf.control_dependencies([ tf.assert_equal(batch_size, t3_10nn_fnames.shape[0]), tf.assert_equal(tf.strings.length(t3_10nn_fnames), 21) ]): print(t3_10nn_fnames.shape) t3_10nn_fnames = tf.strings.join([path_prefix_t3, t3_10nn_fnames]) print('<<<<<<<<<<<<<<<<<<<') print(t3_10nn_fnames.shape) print('<<<<<<<<<<<<<<<<<<<') print('t3_10nn_fnames.shape: %s' % str(t3_10nn_fnames.shape)) for id in range(batch_size): file = tf.read_file(t3_10nn_fnames[id]) print(file) file = tf.image.decode_jpeg(file) file = resize_img(file, tile_size, batch_size) file = tf.expand_dims(file, 0) t3_10nn_images = file if id == 0 else tf.concat( axis=0, values=[t3_10nn_images, file]) print('train_images.shape..:', train_images.shape) print('t3_10nn_images.shape:', t3_10nn_images.shape) # t4 ############################################################################################ path_prefix_t4 = path_tile_base + tf.constant("t4/") filetype = tf.constant("_t4.jpg") for id in range(batch_size): t4_10nn_ids_b = t4_10nn_ids[id] index = nn_id[id] t4_10nn_id = tf.gather(t4_10nn_ids_b, index) t4_10nn_id_str = tf.as_string(t4_10nn_id) t4_10nn_subids_b = t4_10nn_subids[id] t4_10nn_subid = tf.gather(t4_10nn_subids_b, index) t4_10nn_subid_str = tf.as_string(t4_10nn_subid) postfix = underscore + t4_10nn_subid_str + filetype fname = get_filename(t4_10nn_id_str, postfix) t4_10nn_fnames = fname if id == 0 else tf.concat( axis=0, values=[t4_10nn_fnames, fname]) with tf.control_dependencies([ tf.assert_equal(batch_size, t4_10nn_fnames.shape[0]), tf.assert_equal(tf.strings.length(t4_10nn_fnames), 21) ]): print(t4_10nn_fnames.shape) t4_10nn_fnames = tf.strings.join([path_prefix_t4, t4_10nn_fnames]) print('<<<<<<<<<<<<<<<<<<<') print(t4_10nn_fnames.shape) print('<<<<<<<<<<<<<<<<<<<') print('t4_10nn_fnames.shape: %s' % str(t4_10nn_fnames.shape)) for id in range(batch_size): file = tf.read_file(t4_10nn_fnames[id]) print(file) file = tf.image.decode_jpeg(file) file = resize_img(file, tile_size, batch_size) file = tf.expand_dims(file, 0) t4_10nn_images = file if id == 0 else tf.concat( axis=0, values=[t4_10nn_images, file]) print('train_images.shape..:', train_images.shape) print('t4_10nn_images.shape:', t4_10nn_images.shape) # ########################################################################################################### # ########################################################################################################### I_ref_t1 = tf.image.crop_to_bounding_box(images_I_ref, 0, 0, tile_size, tile_size) I_ref_t2 = tf.image.crop_to_bounding_box(images_I_ref, 0, tile_size, tile_size, tile_size) I_ref_t3 = tf.image.crop_to_bounding_box(images_I_ref, tile_size, 0, tile_size, tile_size) I_ref_t4 = tf.image.crop_to_bounding_box(images_I_ref, tile_size, tile_size, tile_size, tile_size) # replace tile w/ max L2 wrt I_ref w/ respective tile of I_ref tau = 16000 for id in range(batch_size): index = nn_id[id] t1_10nn_L2_b = tf.gather(t1_10nn_L2[id], index) t2_10nn_L2_b = tf.gather(t2_10nn_L2[id], index) t3_10nn_L2_b = tf.gather(t3_10nn_L2[id], index) t4_10nn_L2_b = tf.gather(t4_10nn_L2[id], index) all_L2 = tf.stack(axis=0, values=[ t1_10nn_L2_b, t2_10nn_L2_b, t3_10nn_L2_b, t4_10nn_L2_b ]) argmax_L2 = tf.argmax(tf.reshape(all_L2, [-1]), axis=0) argmin_L2 = tf.argmin(tf.reshape(all_L2, [-1]), axis=0) # pick I_ref_t1 IFF t1 is argmax L2 or L2 > TAU and t1 is not argmin L2 is_t1_maxL2 = tf.equal(argmax_L2, 0) is_t1_minL2 = tf.equal(argmin_L2, 0) cond_Iref_t1 = tf.logical_and( tf.logical_or(is_t1_maxL2, tf.greater(t1_10nn_L2_b, tau)), tf.logical_not(is_t1_minL2)) cond_Iref_t1_s = tf.expand_dims( cond_Iref_t1, 0) if id == 0 else tf.concat( axis=0, values=[cond_Iref_t1_s, tf.expand_dims(cond_Iref_t1, 0)]) tile_1 = tf.expand_dims( tf.where(cond_Iref_t1, I_ref_t1[id], t1_10nn_images[id]), 0) assignment_1 = tf.where(cond_Iref_t1, 0, 1) J_1_tile = tile_1 if id == 0 else tf.concat( axis=0, values=[J_1_tile, tile_1]) is_t2_maxL2 = tf.equal(argmax_L2, 1) is_t2_minL2 = tf.equal(argmin_L2, 1) cond_Iref_t2 = tf.logical_and( tf.logical_or(is_t2_maxL2, tf.greater(t2_10nn_L2_b, tau)), tf.logical_not(is_t2_minL2)) cond_Iref_t2_s = tf.expand_dims( cond_Iref_t2, 0) if id == 0 else tf.concat( axis=0, values=[cond_Iref_t2_s, tf.expand_dims(cond_Iref_t2, 0)]) tile_2 = tf.expand_dims( tf.where(cond_Iref_t2, I_ref_t2[id], t2_10nn_images[id]), 0) assignment_2 = tf.where(cond_Iref_t2, 0, 1) J_2_tile = tile_2 if id == 0 else tf.concat( axis=0, values=[J_2_tile, tile_2]) is_t3_maxL2 = tf.equal(argmax_L2, 2) is_t3_minL2 = tf.equal(argmin_L2, 2) cond_Iref_t3 = tf.logical_and( tf.logical_or(is_t3_maxL2, tf.greater(t3_10nn_L2_b, tau)), tf.logical_not(is_t3_minL2)) cond_Iref_t3_s = tf.expand_dims( cond_Iref_t3, 0) if id == 0 else tf.concat( axis=0, values=[cond_Iref_t3_s, tf.expand_dims(cond_Iref_t3, 0)]) tile_3 = tf.expand_dims( tf.where(cond_Iref_t3, I_ref_t3[id], t3_10nn_images[id]), 0) assignment_3 = tf.where(cond_Iref_t3, 0, 1) J_3_tile = tile_3 if id == 0 else tf.concat( axis=0, values=[J_3_tile, tile_3]) is_t4_maxL2 = tf.equal(argmax_L2, 3) is_t4_minL2 = tf.equal(argmin_L2, 3) cond_Iref_t4 = tf.logical_and( tf.logical_or(is_t4_maxL2, tf.greater(t4_10nn_L2_b, tau)), tf.logical_not(is_t4_minL2)) cond_Iref_t4_s = tf.expand_dims( cond_Iref_t4, 0) if id == 0 else tf.concat( axis=0, values=[cond_Iref_t4_s, tf.expand_dims(cond_Iref_t4, 0)]) tile_4 = tf.expand_dims( tf.where(cond_Iref_t4, I_ref_t4[id], t4_10nn_images[id]), 0) assignment_4 = tf.where(cond_Iref_t4, 0, 1) J_4_tile = tile_4 if id == 0 else tf.concat( axis=0, values=[J_4_tile, tile_4]) assignments = tf.stack(axis=0, values=[ assignment_1, assignment_2, assignment_3, assignment_4 ]) assignments = tf.reshape(assignments, [-1]) assignments = tf.expand_dims(assignments, 0) assignments_actual = assignments if id == 0 else tf.concat( axis=0, values=[assignments_actual, assignments]) assert J_1_tile.shape[0] == batch_size assert J_1_tile.shape[1] == tile_size assert J_1_tile.shape[2] == tile_size assert J_1_tile.shape[3] == 3 assert J_1_tile.shape == J_2_tile.shape assert J_2_tile.shape == J_3_tile.shape assert J_2_tile.shape == J_4_tile.shape assert assignments_actual.shape[0] == batch_size assert assignments_actual.shape[1] == 4 # [('000000000927_1.jpg', 0.03125), ('000000568135_2.jpg', 19095.953), ('000000187857_1.jpg', 23359.39), # ('000000521998_2.jpg', 23557.688), ('000000140816_1.jpg', 24226.852), ('000000015109_1.jpg', 25191.469), # ('000000525567_1.jpg', 25484.93), ('000000377422_1.jpg', 25654.125), ('000000269815_2.jpg', 26794.836), # ('000000345617_2.jpg', 26872.812)] ######################################################################################################## sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord=coord) max_batches = 1 cnt_batches = 0 max_iterations = batch_size timef = datetime.now().strftime('%Y%m%d_%H%M%S') filedir_out_base = os.path.join(filedir_out_base, timef) os.makedirs(filedir_out_base, exist_ok=True) try: while not coord.should_stop(): # r, s = sess.run([t1_10nn_ids, t1_10nn_subids]) # print(r) # print(s) print('assignments_actual.shape: ', assignments_actual.shape) img_ref, aa, inds, t1l2l, t2l2l, t3l2l, t4l2l, t1ids, t2ids, t3ids, t4ids, c1, c2, c3, c4, f1,f2,f3,f4,fr, \ t1_img, t2_img, t3_img, t4_img, J1t,J2t,J3t,J4t,I1,I2,I3,I4 = sess.run([images_I_ref, assignments_actual, nn_id, t1_10nn_L2, t2_10nn_L2, t3_10nn_L2, t4_10nn_L2, \ t1_10nn_ids, t2_10nn_ids, t3_10nn_ids, t4_10nn_ids, \ cond_Iref_t1_s, cond_Iref_t2_s, cond_Iref_t3_s, cond_Iref_t4_s, \ t1_10nn_fnames, t2_10nn_fnames, t3_10nn_fnames, t4_10nn_fnames, filenames, \ t1_10nn_images, t2_10nn_images, t3_10nn_images, t4_10nn_images, \ J_1_tile, J_2_tile, J_3_tile, J_4_tile, \ I_ref_t1, I_ref_t2, I_ref_t3, I_ref_t4]) cnt_iterations = 0 for i in range(batch_size): print('ITERATION [%d] >>>>>>' % i) print( '****************************************************************************************************************************************' ) print('assignments_actual:') print(aa[i]) print('index:') print(inds[i]) print('t1_10nn_ids:') print(t1ids[i]) print('t2_10nn_ids:') print(t2ids[i]) print('t3_10nn_ids:') print(t3ids[i]) print('t4_10nn_ids:') print(t4ids[i]) print('t1_10nn_L2:') print(t1l2l[i]) print('t2_10nn_L2:') print(t2l2l[i]) print('t3_10nn_L2:') print(t3l2l[i]) print('t4_10nn_L2:') print(t4l2l[i]) print('t1_10nn_L2 selected:') print(t1l2l[i][inds[i]]) print('t2_10nn_L2 selected:') print(t2l2l[i][inds[i]]) print('t3_10nn_L2 selected:') print(t3l2l[i][inds[i]]) print('t4_10nn_L2 selected:') print(t4l2l[i][inds[i]]) print('condition: %s - %s - %s - %s' % (str(c1[i]), str(c2[i]), str(c3[i]), str(c4[i]))) print(fr[i].decode("utf-8")) print(f1[i].decode("utf-8")) print(f2[i].decode("utf-8")) print(f3[i].decode("utf-8")) print(f4[i].decode("utf-8")) print( '****************************************************************************************************************************************' ) t_img = img_ref[i] frn = fr[i].decode("utf-8") name = os.path.join(filedir_out_base, ('%s_I_ref_' + frn) % i) print('save I_ref to %s...' % name) imsave(name, t_img) # save_to_file(f1, filedir_out_base, i, t1_img) # save_to_file(f2, filedir_out_base, i, t2_img) # save_to_file(f3, filedir_out_base, i, t3_img) # save_to_file(f4, filedir_out_base, i, t4_img) grid_size = np.ceil(np.sqrt(batch_size)) grid = [grid_size, grid_size] t_imgs = np.stack( (t1_img[i], t2_img[i], t3_img[i], t4_img[i])) assert t_imgs.shape[0] == 4 assert t_imgs.shape[1] == 64 assert t_imgs.shape[2] == 64 assert t_imgs.shape[3] == 3 save_images( t_imgs, grid, os.path.join( filedir_out_base, '%s_I_ref_t1-t4_%s.jpg' % (i, ''.join(str(e) for e in aa[i])))) t_imgs = np.stack((J1t[i], J2t[i], J3t[i], J4t[i])) assert t_imgs.shape[0] == 4 assert t_imgs.shape[1] == 64 assert t_imgs.shape[2] == 64 assert t_imgs.shape[3] == 3 save_images( t_imgs, grid, os.path.join( filedir_out_base, '%s_I_M_%s.jpg' % (i, ''.join(str(e) for e in aa[i])))) print('variance:') print(np.var(J1t[i])) print(np.var(J2t[i])) print(np.var(J3t[i])) print(np.var(J4t[i])) print('ITERATION [%d] <<<<<<' % i) cnt_iterations = cnt_iterations + 1 if cnt_iterations >= max_iterations: break cnt_batches = cnt_batches + 1 if cnt_batches >= max_batches: break except Exception as e: if hasattr( e, 'message' ) and 'is closed and has insufficient elements' in e.message: print('Done training -- epoch limit reached') else: print('Exception here, ending training..') tb = traceback.format_exc() print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>') print(e) print(tb) print('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<') finally: # When done, ask the threads to stop. coord.request_stop() coord.join(threads)
def call(self, x, training=False): if not training: training = tf.constant(False) training = tf.logical_and(training, self.trainable) return super().call(x, training)
def page_no_is_within(page_no, page_range): get_logger().debug('page_no: %s, page_range: %s', page_no, page_range) return tf.logical_and(page_no >= page_range[0], page_no <= page_range[1])
def interpolate2d(imgs, _x, _y): imgs_shape = imgs.get_shape() nbatch = int(imgs_shape[0]) height = int(imgs_shape[1]) width = int(imgs_shape[2]) nchannels = int(imgs_shape[3]) npixels = int(_x.get_shape()[1]) height_float = float(height) width_float = float(width) x = tf.reshape(_x, [-1]) y = tf.reshape(_y, [-1]) _x0 = tf.floor(x) _y0 = tf.floor(y) dx = x - _x0 dy = y - _y0 w00 = tf.reshape((1.0 - dx) * (1.0 - dy), [-1, 1, 1]) w01 = tf.reshape(dx * (1.0 - dy), [-1, 1, 1]) w10 = tf.reshape(((1.0 - dx) * dy), [-1, 1, 1]) w11 = tf.reshape(dx * dy, [-1, 1, 1]) base = tf.reshape( tf.tile(tf.expand_dims(tf.range(nbatch) * height * width, -1), [1, npixels]), [nbatch * npixels]) x0 = tf.cast(_x0, dtype=tf.int32) y0 = tf.cast(_y0, dtype=tf.int32) x1 = x0 + 1 y1 = y0 + 1 zero = tf.zeros([], dtype='int32') x0 = tf.clip_by_value(x0, zero, width - 1) x1 = tf.clip_by_value(x1, zero, width - 1) y0 = tf.clip_by_value(y0, zero, height - 1) y1 = tf.clip_by_value(y1, zero, height - 1) index00 = base + y0 * width + x0 index01 = base + y0 * width + x1 index10 = base + y1 * width + x0 index11 = base + y1 * width + x1 imgs_flat = tf.reshape(imgs, [nbatch * height * width, nchannels, 1]) I00 = tf.gather(imgs_flat, index00) I01 = tf.gather(imgs_flat, index01) I10 = tf.gather(imgs_flat, index10) I11 = tf.gather(imgs_flat, index11) output = tf.add_n([ tf.matmul(I00, w00), tf.matmul(I01, w01), tf.matmul(I10, w10), tf.matmul(I11, w11) ]) output = tf.reshape(output, [nbatch, npixels, nchannels]) cliped_x = tf.clip_by_value(_x, 0.0, width_float - 1.0) cliped_y = tf.clip_by_value(_y, 0.0, height_float - 1.0) mask = tf.expand_dims( tf.to_float( tf.logical_and(tf.equal(_x, cliped_x), tf.equal(_y, cliped_y))), -1) return output, mask
def loop_cond(i, decodes_BxT, unused_cache_BxU_dict): finished_B = tf.reduce_any(tf.equal(decodes_BxT, EOS_ID), axis=1) return tf.logical_and(i < max_decode_len, tf.logical_not(tf.reduce_all(finished_B)))
def Model(features, labels, mode, params): TRAIN = mode == tf.estimator.ModeKeys.TRAIN EVAL = mode == tf.estimator.ModeKeys.EVAL PREDICT = mode == tf.estimator.ModeKeys.PREDICT # 미리 정의된 임베딩 사용 유무를 확인 한다. # 값이 True이면 임베딩을 해서 학습하고 False이면 # onehotencoding 처리 한다. if params['embedding'] == True: # 가중치 행렬에 대한 초기화 함수이다. # xavier (Xavier Glorot와 Yoshua Bengio (2010) # URL : http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf initializer = tf.contrib.layers.xavier_initializer() # 인코딩 변수를 선언하고 값을 설정한다. embedding_encoder = tf.get_variable(name="embedding_encoder", # 이름 shape=[params['vocabulary_length'], params['embedding_size']], # 모양 dtype=tf.float32, # 타입 initializer=initializer, # 초기화 값 trainable=True) # 학습 유무 else: # tf.eye를 통해서 사전의 크기 만큼의 단위행렬 # 구조를 만든다. embedding_encoder = tf.eye(num_rows=params['vocabulary_length'], dtype=tf.float32) # 인코딩 변수를 선언하고 값을 설정한다. embedding_encoder = tf.get_variable(name="embedding_encoder", # 이름 initializer=embedding_encoder, # 초기화 값 trainable=False) # 학습 유무 # embedding_lookup을 통해서 features['input']의 인덱스를 # 위에서 만든 embedding_encoder의 인덱스의 값으로 변경하여 # 임베딩된 디코딩 배치를 만든다. embedding_encoder_batch = tf.nn.embedding_lookup(params=embedding_encoder, ids=features['input']) # 미리 정의된 임베딩 사용 유무를 확인 한다. # 값이 True이면 임베딩을 해서 학습하고 False이면 # onehotencoding 처리 한다. if params['embedding'] == True: # 가중치 행렬에 대한 초기화 함수이다. # xavier (Xavier Glorot와 Yoshua Bengio (2010) # URL : http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf initializer = tf.contrib.layers.xavier_initializer() # 디코딩 변수를 선언하고 값을 설정한다. embedding_decoder = tf.get_variable(name="embedding_decoder", # 이름 shape=[params['vocabulary_length'], params['embedding_size']], # 모양 dtype=tf.float32, # 타입 initializer=initializer, # 초기화 값 trainable=True) # 학습 유무 else: # tf.eye를 통해서 사전의 크기 만큼의 단위행렬 # 구조를 만든다. embedding_decoder = tf.eye(num_rows=params['vocabulary_length'], dtype=tf.float32) # 인코딩 변수를 선언하고 값을 설정한다. embedding_decoder = tf.get_variable(name='embedding_decoder', # 이름 initializer=embedding_decoder, # 초기화 값 trainable=False) # 학습 유무 # 변수 재사용을 위해서 reuse=.AUTO_REUSE를 사용하며 범위를 # 정해주고 사용하기 위해 scope설정을 한다. # make_lstm_cell이 "cell"반복적으로 호출 되면서 재사용된다. with tf.variable_scope('encoder_scope', reuse=tf.AUTO_REUSE): # 값이 True이면 멀티레이어로 모델을 구성하고 False이면 # 단일레이어로 모델을 구성 한다. if params['multilayer'] == True: # layerSize 만큼 LSTMCell을 encoder_cell_list에 담는다. encoder_cell_list = [make_lstm_cell(mode, params['hidden_size'], i) for i in range(params['layer_size'])] # MUltiLayer RNN CEll에 encoder_cell_list를 넣어 멀티 레이어를 만든다. rnn_cell = tf.contrib.rnn.MultiRNNCell(encoder_cell_list, state_is_tuple=False) else: # 단층 LSTMLCell을 만든다. rnn_cell = make_lstm_cell(mode, params['hidden_size'], "") # rnn_cell에 의해 지정된 반복적인 신경망을 만든다. # encoder_outputs(RNN 출력 Tensor)[batch_size, # max_time, cell.output_size] # encoder_states 최종 상태 [batch_size, cell.state_size] encoder_outputs, encoder_states = tf.nn.dynamic_rnn(cell=rnn_cell, # RNN 셀 inputs=embedding_encoder_batch, # 입력 값 dtype=tf.float32) # 타입 # 변수 재사용을 위해서 reuse=.AUTO_REUSE를 사용하며 범위를 정해주고 # 사용하기 위해 scope설정을 한다. # make_lstm_cell이 "cell"반복적으로 호출 되면서 재사용된다. with tf.variable_scope('decoder_scope', reuse=tf.AUTO_REUSE): # 값이 True이면 멀티레이어로 모델을 구성하고 False이면 단일레이어로 # 모델을 구성 한다. if params['multilayer'] == True: # layer_size 만큼 LSTMCell을 decoder_cell_list에 담는다. decoder_cell_list = [make_lstm_cell(mode, params['hidden_size'], i) for i in range(params['layer_size'])] # MUltiLayer RNN CEll에 decoder_cell_list를 넣어 멀티 레이어를 만든다. rnn_cell = tf.contrib.rnn.MultiRNNCell(decoder_cell_list, state_is_tuple=False) else: # 단층 LSTMLCell을 만든다. rnn_cell = make_lstm_cell(mode, params['hidden_size'], "") decoder_state = encoder_states # 매 타임 스텝에 나오는 아웃풋을 저장하는 리스트 두개를 만든다. # 하나는 토큰 인덱스는 predict_tokens 저장 # 다른 하나는 temp_logits에 logits 저장한다. predict_tokens = list() temp_logits = list() # 평가인 경우에는 teacher forcing이 되지 않도록 해야한다. # 따라서 학습이 아닌경우에 is_train을 False로 하여 teacher forcing이 되지 않도록 한다. output_token = tf.ones(shape=(tf.shape(encoder_outputs)[0],), dtype=tf.int32) * 1 # 전체 문장 길이 만큼 타임 스텝을 돌도록 한다. for i in range(DEFINES.max_sequence_length): # 두 번쨰 스텝 이후에는 teacher forcing을 적용하는지 확률에 따라 결정하도록 한다. # teacher forcing rate은 teacher forcing을 어느정도 줄 것인지를 조절한다. if TRAIN: if i > 0: # tf.cond를 통해 rnn에 입력할 입력 임베딩 벡터를 결정한다 여기서 true인 경우엔 입력된 output값 아닌경우에는 이전 스텝에 # 나온 output을 사용한다. input_token_emb = tf.cond( tf.logical_and( # 논리 and 연산자 True, tf.random_uniform(shape=(), maxval=1) <= params['teacher_forcing_rate'] # 률에 따른 labels값 지원 유무 ), lambda: tf.nn.embedding_lookup(embedding_encoder, labels[:, i-1]), # labels 정답을 넣어주고 있다. lambda: tf.nn.embedding_lookup(embedding_encoder, output_token) # 모델이 정답이라고 생각 하는 값 ) else: input_token_emb = tf.nn.embedding_lookup(embedding_encoder, output_token) # 모델이 정답이라고 생각 하는 값 else: # 평가 및 예측은 여기를 진행해야 한다. input_token_emb = tf.nn.embedding_lookup(embedding_encoder, output_token) # 어텐션 적용 부분 if params['attention'] == True: W1 = tf.keras.layers.Dense(params['hidden_size']) W2 = tf.keras.layers.Dense(params['hidden_size']) V = tf.keras.layers.Dense(1) # (?, 256) -> (?, 128) hidden_with_time_axis = W2(decoder_state) # (?, 128) -> (?, 1, 128) hidden_with_time_axis = tf.expand_dims(hidden_with_time_axis, axis=1) # (?, 1, 128) -> (?, 25, 128) hidden_with_time_axis = tf.manip.tile(hidden_with_time_axis, [1, DEFINES.max_sequence_length, 1]) # (?, 25, 1) score = V(tf.nn.tanh(W1(encoder_outputs) + hidden_with_time_axis)) # score = V(tf.nn.tanh(W1(encoderOutputs) + tf.manip.tile(tf.expand_dims(W2(decoder_state), axis=1), [1, DEFINES.maxSequenceLength, 1]))) # (?, 25, 1) attention_weights = tf.nn.softmax(score, axis=-1) # (?, 25, 128) context_vector = attention_weights * encoder_outputs # (?, 25, 128) -> (?, 128) context_vector = tf.reduce_sum(context_vector, axis=1) # (?, 256) input_token_emb = tf.concat([context_vector, input_token_emb], axis=-1) # RNNCell을 호출하여 RNN 스텝 연산을 진행하도록 한다. input_token_emb = tf.keras.layers.Dropout(0.5)(input_token_emb) decoder_outputs, decoder_state = rnn_cell(input_token_emb, decoder_state) decoder_outputs = tf.keras.layers.Dropout(0.5)(decoder_outputs) # feedforward를 거쳐 output에 대한 logit값을 구한다. output_logits = tf.layers.dense(decoder_outputs, params['vocabulary_length'], activation=None) # softmax를 통해 단어에 대한 예측 probability를 구한다. output_probs = tf.nn.softmax(output_logits) output_token = tf.argmax(output_probs, axis=-1) # 한 스텝에 나온 토큰과 logit 결과를 저장해둔다. predict_tokens.append(output_token) temp_logits.append(output_logits) # 저장했던 토큰과 logit 리스트를 stack을 통해 메트릭스로 만들어 준다. # 만들게 뙤면 차원이 [시퀀스 X 배치 X 단어 feature 수] 이렇게 되는데 # 이를 transpose하여 [배치 X 시퀀스 X 단어 feature 수] 로 맞춰준다. predict = tf.transpose(tf.stack(predict_tokens, axis=0), [1, 0]) logits = tf.transpose(tf.stack(temp_logits, axis=0), [1, 0, 2]) print(predict.shape) print(logits.shape) if PREDICT: if params['serving'] == True: export_outputs = { 'indexs': tf.estimator.export.PredictOutput(predict) # 서빙 결과값을 준다. } predictions = { # 예측 값들이 여기에 딕셔너리 형태로 담긴다. 'indexs': predict, # 시퀀스 마다 예측한 값 'logits': logits, # 마지막 결과 값 } # 에스티메이터에서 리턴하는 값은 tf.estimator.EstimatorSpec # 객체를 리턴 한다. # mode : 에스티메이터가 수행하는 mode (tf.estimator.ModeKeys.PREDICT) # predictions : 예측 값 if params['serving'] == True: return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs) return tf.estimator.EstimatorSpec(mode, predictions=predictions) # 마지막 결과 값과 정답 값을 비교하는 # tf.nn.sparse_softmax_cross_entropy_with_logits(로스함수)를 # 통과 시켜 틀린 만큼의 # 에러 값을 가져 오고 이것들은 차원 축소를 통해 단일 텐서 값을 반환 한다. # pad의 loss값을 무력화 시킨다. pad가 아닌값은 1 pad인 값은 0을 주어 동작 # 하도록 한다. # 정답 차원 변경을 한다. [배치 * max_sequence_length * vocabulary_length] # logits과 같은 차원을 만들기 위함이다. labels_ = tf.one_hot(labels, params['vocabulary_length']) if TRAIN and params['loss_mask'] == True: loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels_)) masks = features['length'] loss = loss * tf.cast(masks, tf.float32) loss = tf.reduce_mean(loss) else: loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels_)) # 라벨과 결과가 일치하는지 빈도 계산을 통해 # 정확도를 측정하는 방법이다. accuracy = tf.metrics.accuracy(labels=labels, predictions=predict, name='accOp') # 정확도를 전체값으로 나눈 값이다. metrics = {'accuracy': accuracy} tf.summary.scalar('accuracy', accuracy[1]) # 평가 mode 확인 부분이며 평가는 여기 까지 # 수행하고 리턴한다. if EVAL: # 에스티메이터에서 리턴하는 값은 # tf.estimator.EstimatorSpec 객체를 리턴 한다. # mode : 에스티메이터가 수행하는 mode (tf.estimator.ModeKeys.EVAL) # loss : 에러 값 # eval_metric_ops : 정확도 값 return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) # 파이썬 assert구문으로 거짓일 경우 프로그램이 종료 된다. # 수행 mode(tf.estimator.ModeKeys.TRAIN)가 # 아닌 경우는 여기 까지 오면 안되도록 방어적 코드를 넣은것이다. assert TRAIN # 아담 옵티마이저를 사용한다. optimizer = tf.train.AdamOptimizer(learning_rate=DEFINES.learning_rate) # 에러값을 옵티마이저를 사용해서 최소화 시킨다. train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) # 에스티메이터에서 리턴하는 값은 tf.estimator.EstimatorSpec 객체를 리턴 한다. # mode : 에스티메이터가 수행하는 mode (tf.estimator.ModeKeys.EVAL) # loss : 에러 값 # train_op : 그라디언트 반환 return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def dense_resample(im, flow_im, output_valid_mask, name='dense_resample'): """ Resample reward at particular locations. Args: im: ...xHxWxC matrix to sample from. flow_im: ...xHxWx2 matrix, samples the image using absolute offsets as given by the flow_im. """ with tf.name_scope(name): valid_mask = None x, y = tf.unstack(flow_im, axis=-1) x = tf.cast(tf.reshape(x, [-1]), tf.float32) y = tf.cast(tf.reshape(y, [-1]), tf.float32) # constants shape = tf.unstack(tf.shape(im)) channels = shape[-1] width = shape[-2] height = shape[-3] num_batch = tf.cast(tf.reduce_prod(tf.stack(shape[:-3])), 'int32') zero = tf.constant(0, dtype=tf.int32) # Round up and down. x0 = tf.cast(tf.floor(x), 'int32'); x1 = x0 + 1; y0 = tf.cast(tf.floor(y), 'int32'); y1 = y0 + 1; if output_valid_mask: valid_mask = tf.logical_and( tf.logical_and(tf.less_equal(x, tf.cast(width, tf.float32) - 1.), tf.greater_equal(x, 0.)), tf.logical_and(tf.less_equal(y, tf.cast(height, tf.float32) - 1.), tf.greater_equal(y, 0.))) valid_mask = tf.reshape(valid_mask, shape=shape[:-1] + [1]) x0 = tf.clip_by_value(x0, zero, width - 1) x1 = tf.clip_by_value(x1, zero, width - 1) y0 = tf.clip_by_value(y0, zero, height - 1) y1 = tf.clip_by_value(y1, zero, height - 1) dim2 = width; dim1 = width * height; # Create base index base = tf.reshape(tf.range(num_batch) * dim1, shape=[-1, 1]) base = tf.reshape(tf.tile(base, [1, height * width]), shape=[-1]) base_y0 = base + y0 * dim2 base_y1 = base + y1 * dim2 idx_a = base_y0 + x0 idx_b = base_y1 + x0 idx_c = base_y0 + x1 idx_d = base_y1 + x1 # use indices to lookup pixels in the flat image and restore channels dim sh = tf.stack([tf.constant(-1, dtype=tf.int32), channels]) im_flat = tf.cast(tf.reshape(im, sh), dtype=tf.float32) pixel_a = tf.gather(im_flat, idx_a) pixel_b = tf.gather(im_flat, idx_b) pixel_c = tf.gather(im_flat, idx_c) pixel_d = tf.gather(im_flat, idx_d) # and finally calculate interpolated values x1_f = tf.to_float(x1) y1_f = tf.to_float(y1) wa = tf.expand_dims(((x1_f - x) * (y1_f - y)), 1) wb = tf.expand_dims((x1_f - x) * (1.0 - (y1_f - y)), 1) wc = tf.expand_dims(((1.0 - (x1_f - x)) * (y1_f - y)), 1) wd = tf.expand_dims(((1.0 - (x1_f - x)) * (1.0 - (y1_f - y))), 1) output = tf.add_n([wa * pixel_a, wb * pixel_b, wc * pixel_c, wd * pixel_d]) output = tf.reshape(output, shape=tf.shape(im)) return output, valid_mask
def _random_crop(image_list, crop_height, crop_width): """Crops the given list of images. The function applies the same crop to each image in the list. This can be effectively applied when there are multiple image inputs of the same dimension such as: image, depths, normals = _random_crop([image, depths, normals], 120, 150) Args: image_list: a list of image tensors of the same dimension but possibly varying channel. crop_height: the new height. crop_width: the new width. Returns: the image_list with cropped images. Raises: ValueError: if there are multiple image inputs provided with different size or the images are smaller than the crop dimensions. """ if not image_list: raise ValueError('Empty image_list.') # Compute the rank assertions. rank_assertions = [] for i in range(len(image_list)): image_rank = tf.rank(image_list[i]) rank_assert = tf.Assert(tf.equal(image_rank, 3), [ 'Wrong rank for tensor %s [expected] [actual]', image_list[i].name, 3, image_rank ]) rank_assertions.append(rank_assert) image_shape = control_flow_ops.with_dependencies([rank_assertions[0]], tf.shape(image_list[0])) image_height = image_shape[0] image_width = image_shape[1] crop_size_assert = tf.Assert( tf.logical_and(tf.greater_equal(image_height, crop_height), tf.greater_equal(image_width, crop_width)), ['Crop size greater than the image size.']) asserts = [rank_assertions[0], crop_size_assert] for i in range(1, len(image_list)): image = image_list[i] asserts.append(rank_assertions[i]) shape = control_flow_ops.with_dependencies([rank_assertions[i]], tf.shape(image)) height = shape[0] width = shape[1] height_assert = tf.Assert(tf.equal(height, image_height), [ 'Wrong height for tensor %s [expected][actual]', image.name, height, image_height ]) width_assert = tf.Assert(tf.equal(width, image_width), [ 'Wrong width for tensor %s [expected][actual]', image.name, width, image_width ]) asserts.extend([height_assert, width_assert]) # Create a random bounding box. # # Use tf.random_uniform and not numpy.random.rand as doing the former would # generate random numbers at graph eval time, unlike the latter which # generates random numbers at graph definition time. max_offset_height = control_flow_ops.with_dependencies( asserts, tf.reshape(image_height - crop_height + 1, [])) max_offset_width = control_flow_ops.with_dependencies( asserts, tf.reshape(image_width - crop_width + 1, [])) offset_height = tf.random_uniform([], maxval=max_offset_height, dtype=tf.int32) offset_width = tf.random_uniform([], maxval=max_offset_width, dtype=tf.int32) return [ _crop(image, offset_height, offset_width, crop_height, crop_width) for image in image_list ]
def model_fn(mode, inputs, params, reuse=False): """Model function defining the graph operations. Args: mode: (string) can be 'train' or 'eval' inputs: (dict) contains the inputs of the graph (features, labels...) this can be `tf.placeholder` or outputs of `tf.data` params: (Params) contains hyperparameters of the model (ex: `params.learning_rate`) reuse: (bool) whether to reuse the weights Returns: model_spec: (dict) contains the graph operations or nodes needed for training / evaluation """ is_training = (mode == 'train') labels = inputs['labels'] labels = tf.cast(labels, tf.int64) # ----------------------------------------------------------- # MODEL: define the layers of the model with tf.variable_scope('model', reuse=reuse): # Compute the output distribution of the model and the predictions logits = build_model(is_training, inputs, params) predictions = tf.argmax(logits, 1) # Define loss and accuracy loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) accuracy = tf.reduce_mean( tf.cast(tf.equal(labels, predictions), tf.float32)) # Define training step that minimizes the loss with the Adam optimizer if is_training: optimizer = tf.train.AdamOptimizer(params.learning_rate) global_step = tf.train.get_or_create_global_step() if params.use_batch_norm: # Add a dependency to update the moving mean and variance for batch normalization with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.minimize(loss, global_step=global_step) else: train_op = optimizer.minimize(loss, global_step=global_step) # ----------------------------------------------------------- # METRICS AND SUMMARIES # Metrics for evaluation using tf.metrics (average over whole dataset) with tf.variable_scope("metrics"): metrics = { 'accuracy': tf.metrics.accuracy(labels=labels, predictions=tf.argmax(logits, 1)), 'loss': tf.metrics.mean(loss) } # Group the update ops for the tf.metrics update_metrics_op = tf.group(*[op for _, op in metrics.values()]) # Get the op to reset the local variables used in tf.metrics metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="metrics") metrics_init_op = tf.variables_initializer(metric_variables) # Summaries for training tf.summary.scalar('loss', loss) tf.summary.scalar('accuracy', accuracy) tf.summary.image('train_image', inputs['images']) # Add incorrectly labeled images mask = tf.not_equal(labels, predictions) # Add a different summary to know how they were misclassified for label in range(0, params.num_labels): mask_label = tf.logical_and(mask, tf.equal(predictions, label)) incorrect_image_label = tf.boolean_mask(inputs['images'], mask_label) tf.summary.image('incorrectly_labeled_{}'.format(label), incorrect_image_label) # ----------------------------------------------------------- # MODEL SPECIFICATION # Create the model specification and return it # It contains nodes or operations in the graph that will be used for training and evaluation model_spec = inputs model_spec['variable_init_op'] = tf.global_variables_initializer() model_spec["predictions"] = predictions model_spec['loss'] = loss model_spec['accuracy'] = accuracy model_spec['metrics_init_op'] = metrics_init_op model_spec['metrics'] = metrics model_spec['update_metrics'] = update_metrics_op model_spec['summary_op'] = tf.summary.merge_all() if is_training: model_spec['train_op'] = train_op return model_spec
def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits): """ Args: anchor_labels: fHxfWxNA anchor_boxes: fHxfWxNAx4, encoded, these are the gt boxes that match the corresponding anchors label_logits: fHxfWxNA box_logits: fHxfWxNAx4 Returns: label_loss, box_loss """ with tf.device('/cpu:0'): # -1 are the irrelavant bbs IOU of which are between 0.3 and 0.7 valid_mask = tf.stop_gradient(tf.not_equal(anchor_labels, -1)) pos_mask = tf.stop_gradient(tf.equal(anchor_labels, 1)) # counting should be placed on cpu? nr_valid = tf.stop_gradient(tf.count_nonzero(valid_mask, dtype=tf.int32), name='num_valid_anchor') nr_pos = tf.identity(tf.count_nonzero(pos_mask, dtype=tf.int32), name='num_pos_anchor') # nr_pos is guaranteed >0 in C4. But in FPN. even nr_valid could be 0. valid_anchor_labels = tf.boolean_mask(anchor_labels, valid_mask) valid_label_logits = tf.boolean_mask(label_logits, valid_mask) with tf.name_scope('label_metrics'): valid_label_prob = tf.nn.sigmoid(valid_label_logits) summaries = [] with tf.device('/cpu:0'): for th in [0.5, 0.2, 0.1]: valid_prediction = tf.cast(valid_label_prob > th, tf.int32) nr_pos_prediction = tf.reduce_sum(valid_prediction, name='num_pos_prediction') pos_prediction_corr = tf.count_nonzero(tf.logical_and( valid_label_prob > th, tf.equal(valid_prediction, valid_anchor_labels)), dtype=tf.int32) placeholder = 0.5 # A small value will make summaries appear lower. recall = tf.to_float(tf.truediv(pos_prediction_corr, nr_pos)) # in case invalid number is printed recall = tf.where(tf.equal(nr_pos, 0), placeholder, recall, name='recall_th{}'.format(th)) precision = tf.to_float( tf.truediv(pos_prediction_corr, nr_pos_prediction)) precision = tf.where(tf.equal(nr_pos_prediction, 0), placeholder, precision, name='precision_th{}'.format(th)) summaries.extend([precision, recall]) add_moving_summary(*summaries) # Per-level loss summaries in FPN may appear lower due to the use of a small placeholder. # But the total RPN loss will be fine. TODO make the summary op smarter placeholder = 0. label_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits) label_loss = tf.reduce_sum(label_loss) * (1. / cfg.RPN.BATCH_PER_IM) label_loss = tf.where(tf.equal(nr_valid, 0), placeholder, label_loss, name='label_loss') pos_anchor_boxes = tf.boolean_mask(anchor_boxes, pos_mask) pos_box_logits = tf.boolean_mask(box_logits, pos_mask) delta = 1.0 / 9 box_loss = tf.losses.huber_loss(pos_anchor_boxes, pos_box_logits, delta=delta, reduction=tf.losses.Reduction.SUM) / delta box_loss = box_loss * (1. / cfg.RPN.BATCH_PER_IM) box_loss = tf.where(tf.equal(nr_pos, 0), placeholder, box_loss, name='box_loss') add_moving_summary(label_loss, box_loss, nr_valid, nr_pos) return label_loss, box_loss
def call(self, net, training): keep_prob = self.keep_prob dropblock_size = self.dropblock_size data_format = self.data_format if not training or keep_prob is None: return net logging.info('Applying DropBlock: dropblock_size %s, net.shape %s', dropblock_size, net.shape) if data_format == 'channels_last': _, width, height, _ = net.get_shape().as_list() else: _, _, width, height = net.get_shape().as_list() if width != height: raise ValueError('Input tensor with width!=height is not supported.') dropblock_size = min(dropblock_size, width) # seed_drop_rate is the gamma parameter of DropBlcok. seed_drop_rate = (1.0 - keep_prob) * width**2 / dropblock_size**2 / ( width - dropblock_size + 1)**2 # Forces the block to be inside the feature map. w_i, h_i = tf.meshgrid(tf.range(width), tf.range(width)) valid_block_center = tf.logical_and( tf.logical_and(w_i >= int(dropblock_size // 2), w_i < width - (dropblock_size - 1) // 2), tf.logical_and(h_i >= int(dropblock_size // 2), h_i < width - (dropblock_size - 1) // 2)) valid_block_center = tf.expand_dims(valid_block_center, 0) valid_block_center = tf.expand_dims( valid_block_center, -1 if data_format == 'channels_last' else 0) randnoise = tf.random_uniform(net.shape, dtype=tf.float32) block_pattern = ( 1 - tf.cast(valid_block_center, dtype=tf.float32) + tf.cast( (1 - seed_drop_rate), dtype=tf.float32) + randnoise) >= 1 block_pattern = tf.cast(block_pattern, dtype=tf.float32) if dropblock_size == width: block_pattern = tf.reduce_min( block_pattern, axis=[1, 2] if data_format == 'channels_last' else [2, 3], keepdims=True) else: if data_format == 'channels_last': ksize = [1, dropblock_size, dropblock_size, 1] else: ksize = [1, 1, dropblock_size, dropblock_size] block_pattern = -tf.nn.max_pool( -block_pattern, ksize=ksize, strides=[1, 1, 1, 1], padding='SAME', data_format='NHWC' if data_format == 'channels_last' else 'NCHW') percent_ones = ( tf.cast(tf.reduce_sum((block_pattern)), tf.float32) / tf.cast(tf.size(block_pattern), tf.float32)) net = net / tf.cast(percent_ones, net.dtype) * tf.cast( block_pattern, net.dtype) return net
def __init__(self, args, source_chars, target_chars, bow, eow, threads, seed=42): # Create an empty graph and a session graph = tf.Graph() graph.seed = seed self.session = tf.Session(graph=graph, config=tf.ConfigProto( inter_op_parallelism_threads=threads, intra_op_parallelism_threads=threads)) with self.session.graph.as_default(): # Inputs self.sentence_lens = tf.placeholder(tf.int32, [None], name="sentence_lens") self.source_ids = tf.placeholder(tf.int32, [None, None], name="source_ids") self.source_seqs = tf.placeholder(tf.int32, [None, None], name="source_seqs") self.source_seq_lens = tf.placeholder(tf.int32, [None], name="source_seq_lens") self.target_ids = tf.placeholder(tf.int32, [None, None], name="target_ids") self.target_seqs = tf.placeholder(tf.int32, [None, None], name="target_seqs") self.target_seq_lens = tf.placeholder(tf.int32, [None], name="target_seq_lens") self.learning_rate = tf.placeholder_with_default(0.01, None) # Training. The rest of the code assumes that # - when training the decoder, the output layer with logits for each generated # character is in `output_layer` and the corresponding predictions are in # `self.predictions_training`. # - the `target_ids` contains the gold generated characters # - the `target_lens` contains number of valid characters for each lemma # - when running decoder inference, the predictions are in `self.predictions` # and their lengths in `self.prediction_lens`. output_layer, self.predictions_training, target_ids, target_lens, self.predictions, self.prediction_lens = \ self.build_model(args, source_chars, target_chars, bow, eow) # Training weights = tf.sequence_mask(target_lens, dtype=tf.float32) loss = tf.losses.sparse_softmax_cross_entropy(target_ids, output_layer, weights=weights) global_step = tf.train.create_global_step() self.training = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize( loss, global_step=global_step, name="training") # Summaries accuracy_training = tf.reduce_all(tf.logical_or( tf.equal(self.predictions_training, target_ids), tf.logical_not(tf.sequence_mask(target_lens))), axis=1) self.current_accuracy_training, self.update_accuracy_training = tf.metrics.mean( accuracy_training) minimum_length = tf.minimum( tf.shape(self.predictions)[1], tf.shape(target_ids)[1]) accuracy = tf.logical_and( tf.equal(self.prediction_lens, target_lens), tf.reduce_all(tf.logical_or( tf.equal(self.predictions[:, :minimum_length], target_ids[:, :minimum_length]), tf.logical_not( tf.sequence_mask(target_lens, maxlen=minimum_length))), axis=1)) self.current_accuracy, self.update_accuracy = tf.metrics.mean( accuracy) self.current_loss, self.update_loss = tf.metrics.mean( loss, weights=tf.reduce_sum(weights)) self.reset_metrics = tf.variables_initializer( tf.get_collection(tf.GraphKeys.METRIC_VARIABLES)) summary_writer = tf.contrib.summary.create_file_writer( args.logdir, flush_millis=10 * 1000) self.summaries = {} with summary_writer.as_default( ), tf.contrib.summary.record_summaries_every_n_global_steps(10): self.summaries["train"] = [ tf.contrib.summary.scalar("train/loss", self.update_loss), tf.contrib.summary.scalar("train/lr", self.learning_rate), tf.contrib.summary.scalar("train/accuracy", self.update_accuracy_training) ] with summary_writer.as_default( ), tf.contrib.summary.always_record_summaries(): for dataset in ["dev", "test"]: self.summaries[dataset] = [ tf.contrib.summary.scalar(dataset + "/loss", self.current_loss), tf.contrib.summary.scalar(dataset + "/accuracy", self.current_accuracy) ] # Initialize variables self.session.run(tf.global_variables_initializer()) with summary_writer.as_default(): tf.contrib.summary.initialize(session=self.session, graph=self.session.graph) self.saver = tf.train.Saver()
def _local_perm(inputs, targets, is_masked, perm_size, seq_len): """Samples a permutation of the factorization order. Creates perm_mask and target_mask accordingly. Args: inputs: int64 Tensor in shape [seq_len], input ids. targets: int64 Tensor in shape [seq_len], target ids. is_masked: bool Tensor in shape [seq_len]. True means being selected for partial prediction. perm_size: the length of longest permutation. Could be set to be reuse_len. Should not be larger than reuse_len or there will be data leaks. seq_len: int, sequence length. Returns: perm_mask: float32 Tensor in shape [seq_len, seq_len] consisted of 0 and 1. If perm_mask[i][j] == 1, it means the ith token (in original order) cannot attend to the jth token (in original order). This case will happen only when the ith token's permutated position <= the jth token's permutated position, and the jth token is masked or is func token. If perm_mask[i][j] == 0, it means the ith token (in original order) can attend to the jth token (in original order). Note that non-masked tokens can be attended by all other tokens, which is different from the description in original paper. new_targets: int64 Tensor in shape [seq_len], target token ids to be predicted in XLNet. In XLNet, target doesn't need to be shifted one position. target_mask: float32 Tensor in shape [seq_len] consisted of 0 and 1. If target_mask[i] == 1, the ith token needs to be predicted and mask will be used as input. This token will count for loss. If target_mask[i] == 0, token (or [SEP], [CLS]) will be used as input. This token will not count for loss. inputs_k: int64 Tensor in shape [seq_len], input ids. inputs_q: float32 Tensor in shape [seq_len], the same as target_mask. """ # Generate permutation indices index = tf.range(seq_len, dtype=tf.int64) index = tf.transpose(tf.reshape(index, [-1, perm_size])) index = tf.random.shuffle(index) index = tf.reshape(tf.transpose(index), [-1]) # `perm_mask` and `target_mask` # non-functional tokens non_func_tokens = tf.logical_not( tf.logical_or(tf.equal(inputs, SEP_ID), tf.equal(inputs, CLS_ID))) non_mask_tokens = tf.logical_and(tf.logical_not(is_masked), non_func_tokens) masked_or_func_tokens = tf.logical_not(non_mask_tokens) # Set the permutation indices of non-masked (& non-funcional) tokens to the # smallest index (-1): # (1) they can be seen by all other positions # (2) they cannot see masked positions, so there won"t be information leak smallest_index = -tf.ones([seq_len], dtype=tf.int64) rev_index = tf.where(non_mask_tokens, smallest_index, index) # Create `target_mask`: non-funcional and masked tokens # 1: use mask as input and have loss # 0: use token (or [SEP], [CLS]) as input and do not have loss target_tokens = tf.logical_and(masked_or_func_tokens, non_func_tokens) target_mask = tf.cast(target_tokens, tf.float32) # Create `perm_mask` # `target_tokens` cannot see themselves self_rev_index = tf.where(target_tokens, rev_index, rev_index + 1) # 1: cannot attend if i <= j and j is not non-masked (masked_or_func_tokens) # 0: can attend if i > j or j is non-masked perm_mask = tf.logical_and(self_rev_index[:, None] <= rev_index[None, :], masked_or_func_tokens) perm_mask = tf.cast(perm_mask, tf.float32) # new target: [next token] for LM and [curr token] (self) for PLM new_targets = tf.concat([inputs[0:1], targets[:-1]], axis=0) # construct inputs_k inputs_k = inputs # construct inputs_q inputs_q = target_mask return perm_mask, new_targets, target_mask, inputs_k, inputs_q
def train(self, batch_size, learning_rate=1e-4, out_help=False, time_discount=0.4, sampling_probability=0.2): """Build model for training. Args: batch_size: size of training batch """ self.input_data = tf.placeholder(tf.int32, [batch_size, None], name='input_data') self.input_lengths = tf.placeholder(tf.int32, [batch_size], name='input_lengths') self.output_data = tf.placeholder(tf.int32, [batch_size, None], name='output_data') self.output_lengths = tf.placeholder(tf.int32, [batch_size], name='output_lengths') output_data_maxlen = tf.shape(self.output_data)[1] def infer_helper(): return seq2seq.GreedyEmbeddingHelper( self._output_onehot, start_tokens=tf.fill([batch_size], self._output_sos_id), end_token=self._output_eos_id) def train_helper(): start_ids = tf.fill([batch_size, 1], self._output_sos_id) decoder_input_ids = tf.concat([start_ids, self.output_data], 1) decoder_inputs = self._output_onehot(decoder_input_ids) return seq2seq.ScheduledEmbeddingTrainingHelper( decoder_inputs, self.output_lengths, self._output_onehot, sampling_probability) helper = train_helper if out_help else infer_helper self._build_model(batch_size, helper, decoder_maxiters=output_data_maxlen) output_maxlen = tf.minimum( tf.shape(self.outputs)[1], output_data_maxlen) out_data_slice = tf.slice(self.output_data, [0, 0], [-1, output_maxlen]) out_logits_slice = tf.slice(self.outputs, [0, 0, 0], [-1, output_maxlen, -1]) out_pred_slice = tf.slice(self.output_ids, [0, 0], [-1, output_maxlen]) with tf.name_scope("costs"): losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=out_logits_slice, labels=out_data_slice) length_mask = tf.sequence_mask(self.output_lengths, maxlen=output_maxlen, dtype=self._dtype) losses = losses * length_mask # out_id = 2,3,4,5,6 : AA,AE,AH,AO,AW : reduce the cost by 20% for a-confusion data_is_a = tf.logical_and(tf.greater_equal(out_data_slice, 2), tf.less_equal(out_data_slice, 6)) pred_is_a = tf.logical_and(tf.greater_equal(out_pred_slice, 2), tf.less_equal(out_pred_slice, 6)) a_mask = tf.cast(tf.logical_and(data_is_a, pred_is_a), dtype=tf.float32) losses = losses * (1.0 - 0.2 * a_mask) if time_discount > 0: # time discounts (only when using infer helper?) factors = tf.pow( tf.range(1, tf.to_float(output_maxlen + 1), dtype=tf.float32), -time_discount) losses = losses * tf.expand_dims(factors, 0) losses = tf.reduce_sum(losses, 1) self.losses = tf.reduce_sum(losses) tf.summary.scalar('losses', self.losses) inequality = tf.cast(tf.not_equal(self.output_ids, out_data_slice), dtype=tf.float32) # reduce inequality inaccuracy by 20% for a-confusion inequality = inequality * (1.0 - 0.1 * a_mask) self.accuracy = tf.reduce_mean(1.0 - inequality) tf.summary.scalar('accuracy', tf.reduce_sum(self.accuracy)) self.global_step = tf.Variable(0, trainable=False, name="global_step") decay_rate = tf.constant(0.8, dtype=tf.float64) self.learning_rate = learning_rate * tf.pow( decay_rate, tf.floor(self.global_step / 4000)) opt = tf.train.AdamOptimizer(self.learning_rate) self.train_step = opt.minimize(losses, global_step=self.global_step)
def continue_optimization(t, mean, var, best_val, best_sol, elites, returns): return tf.logical_and(tf.less(t, self.max_iters), tf.reduce_max(var) > self.epsilon)