def getRpRnTpTnForTrain0OrVal1(self, y, training0OrValidation1): # The returned list has (numberOfClasses)x4 integers: >numberOfRealPositives, numberOfRealNegatives, numberOfTruePredictedPositives, numberOfTruePredictedNegatives< for each class (incl background). # Order in the list is the natural order of the classes (ie class-0 RP,RN,TPP,TPN, class-1 RP,RN,TPP,TPN, class-2 RP,RN,TPP,TPN ...) # param y: y = T.itensor4('y'). Dimensions [batchSize, r, c, z] yPredToUse = self.y_pred_train if training0OrValidation1 == 0 else self.y_pred_val returnedListWithNumberOfRpRnTpTnForEachClass = [] for class_i in range(0, self._numberOfOutputClasses) : #Number of Real Positive, Real Negatives, True Predicted Positives and True Predicted Negatives are reported PER CLASS (first for WHOLE). tensorOneAtRealPos = tf.equal(y, class_i) tensorOneAtRealNeg = tf.logical_not(tensorOneAtRealPos) tensorOneAtPredictedPos = tf.equal(yPredToUse, class_i) tensorOneAtPredictedNeg = tf.logical_not(tensorOneAtPredictedPos) tensorOneAtTruePos = tf.logical_and(tensorOneAtRealPos,tensorOneAtPredictedPos) tensorOneAtTrueNeg = tf.logical_and(tensorOneAtRealNeg,tensorOneAtPredictedNeg) returnedListWithNumberOfRpRnTpTnForEachClass.append( tf.reduce_sum( tf.cast(tensorOneAtRealPos, dtype="int32")) ) returnedListWithNumberOfRpRnTpTnForEachClass.append( tf.reduce_sum( tf.cast(tensorOneAtRealNeg, dtype="int32")) ) returnedListWithNumberOfRpRnTpTnForEachClass.append( tf.reduce_sum( tf.cast(tensorOneAtTruePos, dtype="int32")) ) returnedListWithNumberOfRpRnTpTnForEachClass.append( tf.reduce_sum( tf.cast(tensorOneAtTrueNeg, dtype="int32")) ) return returnedListWithNumberOfRpRnTpTnForEachClass
def m_body(i, ta_tp, ta_fp, gmatch, n_ignored_det): # Jaccard score with groundtruth bboxes. rbbox = bboxes[i, :] # rbbox = tf.Print(rbbox, [rbbox]) jaccard = bboxes_jaccard(rbbox, gxs, gys) # Best fit, checking it's above threshold. idxmax = tf.cast(tf.argmax(jaccard, axis=0), dtype = tf.int32) jcdmax = jaccard[idxmax] match = jcdmax > matching_threshold existing_match = gmatch[idxmax] not_ignored = tf.logical_not(gignored[idxmax]) n_ignored_det = n_ignored_det + tf.cast(gignored[idxmax], tf.int32) # TP: match & no previous match and FP: previous match | no match. # If ignored: no record, i.e FP=False and TP=False. tp = tf.logical_and(not_ignored, tf.logical_and(match, tf.logical_not(existing_match))) ta_tp = ta_tp.write(i, tp) fp = tf.logical_and(not_ignored, tf.logical_or(existing_match, tf.logical_not(match))) ta_fp = ta_fp.write(i, fp) # Update grountruth match. mask = tf.logical_and(tf.equal(grange, idxmax), tf.logical_and(not_ignored, match)) gmatch = tf.logical_or(gmatch, mask) return [i+1, ta_tp, ta_fp, gmatch,n_ignored_det]
def train(self, sentences): token_ids, token_values, token_dense_shape = self._tokenize(sentences) tokens_sparse = tf.sparse.SparseTensor( indices=token_ids, values=token_values, dense_shape=token_dense_shape) tokens = tf.sparse.to_dense(tokens_sparse, default_value="") sparse_lookup_ids = tf.sparse.SparseTensor( indices=tokens_sparse.indices, values=self._words_to_indices(tokens_sparse.values), dense_shape=tokens_sparse.dense_shape) lookup_ids = tf.sparse.to_dense(sparse_lookup_ids, default_value=0) # Targets are the next word for each word of the sentence. tokens_ids_seq = lookup_ids[:, 0:-1] tokens_ids_target = lookup_ids[:, 1:] tokens_prefix = tokens[:, 0:-1] # Mask determining which positions we care about for a loss: all positions # that have a valid non-terminal token. mask = tf.logical_and( tf.logical_not(tf.equal(tokens_prefix, "")), tf.logical_not(tf.equal(tokens_prefix, "<E>"))) input_mask = tf.cast(mask, tf.int32) with tf.GradientTape() as t: sentence_embeddings = tf.nn.embedding_lookup(self._embeddings, tokens_ids_seq) lstm_initial_state = self._lstm_cell.get_initial_state( sentence_embeddings) lstm_output = self._rnn_layer( inputs=sentence_embeddings, initial_state=lstm_initial_state) # Stack LSTM outputs into a batch instead of a 2D array. lstm_output = tf.reshape(lstm_output, [-1, self._lstm_cell.output_size]) logits = self._logit_layer(lstm_output) targets = tf.reshape(tokens_ids_target, [-1]) weights = tf.cast(tf.reshape(input_mask, [-1]), tf.float32) losses = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=targets, logits=logits) # Final loss is the mean loss for all token losses. final_loss = tf.math.divide( tf.reduce_sum(tf.multiply(losses, weights)), tf.reduce_sum(weights), name="final_loss") watched = t.watched_variables() gradients = t.gradient(final_loss, watched) for w, g in zip(watched, gradients): w.assign_sub(g) return final_loss
def m_body(i, ta_tp, ta_fp, gmatch): # Jaccard score with groundtruth bboxes. rbbox = bboxes[i] jaccard = bboxes_jaccard(rbbox, gbboxes) jaccard = jaccard * tf.cast(tf.equal(glabels, rlabel), dtype=jaccard.dtype) # Best fit, checking it's above threshold. idxmax = tf.cast(tf.argmax(jaccard, axis=0), tf.int32) jcdmax = jaccard[idxmax] match = jcdmax > matching_threshold existing_match = gmatch[idxmax] not_difficult = tf.logical_not(gdifficults[idxmax]) # TP: match & no previous match and FP: previous match | no match. # If difficult: no record, i.e FP=False and TP=False. tp = tf.logical_and(not_difficult, tf.logical_and(match, tf.logical_not(existing_match))) ta_tp = ta_tp.write(i, tp) fp = tf.logical_and(not_difficult, tf.logical_or(existing_match, tf.logical_not(match))) ta_fp = ta_fp.write(i, fp) # Update grountruth match. mask = tf.logical_and(tf.equal(grange, idxmax), tf.logical_and(not_difficult, match)) gmatch = tf.logical_or(gmatch, mask) return [i+1, ta_tp, ta_fp, gmatch]
def get_scheduled_sample_inputs(self, done_warm_start, groundtruth_items, generated_items, scheduled_sampling_func): """Scheduled sampling. Args: done_warm_start: whether we are done with warm start or not. groundtruth_items: list of ground truth items. generated_items: list of generated items. scheduled_sampling_func: scheduled sampling function to choose between groundtruth items and generated items. Returns: A mix list of ground truth and generated items. """ def sample(): """Calculate the scheduled sampling params based on iteration number.""" with tf.variable_scope("scheduled_sampling", reuse=tf.AUTO_REUSE): output_items = [] for item_gt, item_gen in zip(groundtruth_items, generated_items): output_items.append(scheduled_sampling_func(item_gt, item_gen)) return output_items cases = [ (tf.logical_not(done_warm_start), lambda: groundtruth_items), (tf.logical_not(self.is_training), lambda: generated_items), ] output_items = tf.case(cases, default=sample, strict=True) return output_items
def build_graph(self, nn_im_w, nn_im_h, num_colour_channels=3, weights=None, biases=None): num_outputs = 1 #ofc self.nn_im_w = nn_im_w self.nn_im_h = nn_im_h if weights is None: weights = [None, None, None, None, None] if biases is None: biases = [None, None, None, None, None] with tf.device('/cpu:0'): # Placeholder variables for the input image and output images self.x = tf.placeholder(tf.float32, shape=[None, nn_im_w*nn_im_h*3]) self.y_ = tf.placeholder(tf.float32, shape=[None, num_outputs]) self.threshold = tf.placeholder(tf.float32) # Build the convolutional and pooling layers conv1_output_channels = 32 conv2_output_channels = 16 conv3_output_channels = 8 conv_layer_1_input = tf.reshape(self.x, [-1, nn_im_h, nn_im_w, num_colour_channels]) #The resized input image self.build_conv_layer(conv_layer_1_input, num_colour_channels, conv1_output_channels, initial_weights=weights[0], initial_biases=biases[0]) # layer 1 self.build_conv_layer(self.layers[0][0], conv1_output_channels, conv2_output_channels, initial_weights=weights[1], initial_biases=biases[1])# layer 2 self.build_conv_layer(self.layers[1][0], conv2_output_channels, conv3_output_channels, initial_weights=weights[2], initial_biases=biases[2])# layer 3 # Build the fully connected layer convnet_output_w = nn_im_w//8 convnet_output_h = nn_im_h//8 fully_connected_layer_input = tf.reshape(self.layers[2][0], [-1, convnet_output_w * convnet_output_h * conv3_output_channels]) self.build_fully_connected_layer(fully_connected_layer_input, convnet_output_w, convnet_output_h, conv3_output_channels, initial_weights=weights[3], initial_biases=biases[3]) # The dropout stage and readout layer self.keep_prob, self.h_drop = self.dropout(self.layers[3][0]) self.y_conv,_,_ = self.build_readout_layer(self.h_drop, num_outputs, initial_weights=weights[4], initial_biases=biases[4]) self.mean_error = tf.sqrt(tf.reduce_mean(tf.square(self.y_ - self.y_conv))) self.train_step = tf.train.AdamOptimizer(1e-4).minimize(self.mean_error) self.accuracy = (1.0 - tf.reduce_mean(tf.abs(self.y_ - tf.round(self.y_conv)))) positive_examples = tf.greater_equal(self.y_, 0.5) negative_examples = tf.logical_not(positive_examples) positive_classifications = tf.greater_equal(self.y_conv, self.threshold) negative_classifications = tf.logical_not(positive_classifications) self.true_positive = tf.reduce_sum(tf.cast(tf.logical_and(positive_examples, positive_classifications),tf.int32)) # count the examples that are positive and classified as positive self.false_positive = tf.reduce_sum(tf.cast(tf.logical_and(negative_examples, positive_classifications),tf.int32)) # count the examples that are negative but classified as positive self.true_negative = tf.reduce_sum(tf.cast(tf.logical_and(negative_examples, negative_classifications),tf.int32)) # count the examples that are negative and classified as negative self.false_negative = tf.reduce_sum(tf.cast(tf.logical_and(positive_examples, negative_classifications),tf.int32)) # count the examples that are positive but classified as negative self.positive_count = tf.reduce_sum(tf.cast(positive_examples, tf.int32)) # count the examples that are positive self.negative_count = tf.reduce_sum(tf.cast(negative_examples, tf.int32)) # count the examples that are negative self.confusion_matrix = tf.reshape(tf.pack([self.true_positive, self.false_positive, self.false_negative, self.true_negative]), [2,2]) self.sess.run(tf.initialize_all_variables())
def compute_error(self): #Sets mask variables and performs batch processing self.batch_gold_select = self.batch_print_answer > 0.0 self.full_column_mask = tf.concat( axis=1, values=[self.batch_number_column_mask, self.batch_word_column_mask]) self.full_processed_column = tf.concat( axis=1, values=[self.batch_processed_number_column, self.batch_processed_word_column]) self.full_processed_sorted_index_column = tf.concat(axis=1, values=[ self.batch_processed_sorted_index_number_column, self.batch_processed_sorted_index_word_column ]) self.select_bad_number_mask = tf.cast( tf.logical_and( tf.not_equal(self.full_processed_column, self.utility.FLAGS.pad_int), tf.not_equal(self.full_processed_column, self.utility.FLAGS.bad_number_pre_process)), self.data_type) self.select_mask = tf.cast( tf.logical_not( tf.equal(self.batch_number_column, self.utility.FLAGS.pad_int)), self.data_type) self.select_word_mask = tf.cast( tf.logical_not( tf.equal(self.batch_word_column_entry_mask, self.utility.dummy_token_id)), self.data_type) self.select_full_mask = tf.concat( axis=1, values=[self.select_mask, self.select_word_mask]) self.select_whole_mask = tf.maximum( tf.reshape( tf.slice(self.select_mask, [0, 0, 0], [self.batch_size, 1, self.max_elements]), [self.batch_size, self.max_elements]), tf.reshape( tf.slice(self.select_word_mask, [0, 0, 0], [self.batch_size, 1, self.max_elements]), [self.batch_size, self.max_elements])) self.invert_select_full_mask = tf.cast( tf.concat(axis=1, values=[ tf.equal(self.batch_number_column, self.utility.FLAGS.pad_int), tf.equal(self.batch_word_column_entry_mask, self.utility.dummy_token_id) ]), self.data_type) self.batch_lookup_answer = tf.zeros(tf.shape(self.batch_gold_select)) self.reset_select = self.select_whole_mask self.rows = tf.reduce_sum(self.select_whole_mask, 1) self.num_entries = tf.reshape( tf.reduce_sum(tf.reduce_sum(self.select_full_mask, 1), 1), [self.batch_size]) self.final_error, self.final_correct = self.batch_process() return self.final_error
def recall(self, y_): y_true = tf.cast(tf.argmin(y_, 1), tf.bool) y_pred = tf.cast(tf.argmin(self.y, 1), tf.bool) # 1 stands for positive, 0 stands for negative tp = tf.reduce_sum(tf.cast(tf.logical_and(y_true, y_pred), tf.float32)) tn = tf.reduce_sum(tf.cast(tf.logical_not(tf.logical_or(y_true, y_pred)), tf.float32)) p = tf.reduce_sum(tf.cast(y_true, tf.float32)) n = tf.reduce_sum(tf.cast(tf.logical_not(y_true), tf.float32)) fp = p - tp fn = n - tn # t = tf.add(tp, tn) # f = tf.add(fp, fn) relevant = tf.add(tp, fn) recall = tf.div(tp, relevant) return recall
def NLL(self, y, lengths, pis, mus, sigmas, rho, es, eps=1e-8): sigma_1, sigma_2 = tf.split(sigmas, 2, axis=2) y_1, y_2, y_3 = tf.split(y, 3, axis=2) mu_1, mu_2 = tf.split(mus, 2, axis=2) norm = 1.0 / (2*np.pi*sigma_1*sigma_2 * tf.sqrt(1 - tf.square(rho))) Z = tf.square((y_1 - mu_1) / (sigma_1)) + \ tf.square((y_2 - mu_2) / (sigma_2)) - \ 2*rho*(y_1 - mu_1)*(y_2 - mu_2) / (sigma_1*sigma_2) exp = -1.0*Z / (2*(1 - tf.square(rho))) gaussian_likelihoods = tf.exp(exp) * norm gmm_likelihood = tf.reduce_sum(pis * gaussian_likelihoods, 2) gmm_likelihood = tf.clip_by_value(gmm_likelihood, eps, np.inf) bernoulli_likelihood = tf.squeeze(tf.where(tf.equal(tf.ones_like(y_3), y_3), es, 1 - es)) nll = -(tf.log(gmm_likelihood) + tf.log(bernoulli_likelihood)) sequence_mask = tf.logical_and( tf.sequence_mask(lengths, maxlen=tf.shape(y)[1]), tf.logical_not(tf.is_nan(nll)), ) nll = tf.where(sequence_mask, nll, tf.zeros_like(nll)) num_valid = tf.reduce_sum(tf.cast(sequence_mask, tf.float32), axis=1) sequence_loss = tf.reduce_sum(nll, axis=1) / tf.maximum(num_valid, 1.0) element_loss = tf.reduce_sum(nll) / tf.maximum(tf.reduce_sum(num_valid), 1.0) return sequence_loss, element_loss
def prune_outside_window(boxlist, window, scope=None): """Prunes bounding boxes that fall outside a given window. This function prunes bounding boxes that even partially fall outside the given window. See also clip_to_window which only prunes bounding boxes that fall completely outside the window, and clips any bounding boxes that partially overflow. Args: boxlist: a BoxList holding M_in boxes. window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] of the window scope: name scope. Returns: pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes in the input tensor. """ with tf.name_scope(scope, 'PruneOutsideWindow'): y_min, x_min, y_max, x_max = tf.split( value=boxlist.get(), num_or_size_splits=4, axis=1) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) coordinate_violations = tf.concat([ tf.less(y_min, win_y_min), tf.less(x_min, win_x_min), tf.greater(y_max, win_y_max), tf.greater(x_max, win_x_max) ], 1) valid_indices = tf.reshape( tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) return gather(boxlist, valid_indices), valid_indices
def loss_estimate(batch_size,old_state,data,total_data,model_params,base_mean,base_log_var): clipped_log_vals, nan_mask, reset_rows = data zeros = tf.zeros_like(clipped_log_vals) state_init = model_params.init_state(batch_size) data_count = tf.reduce_sum(tf.to_float(tf.logical_not(nan_mask)),name='data_count') model_input = tf.select(nan_mask,zeros,clipped_log_vals) target_outputs = model_input sample_params = model_params.sample_vals(batch_size) #TODO verify significance of old_state filtered_state = tf.select(reset_rows,old_state,state_init) new_state,delta_mean = sample_inference(filtered_state,model_input,sample_params) variance = tf.exp(base_log_var) mean = base_mean + delta_mean * variance raw_losses = gaussian_neg_log_likelyhood(target_outputs,mean,variance) clean_raw_losses = tf.select(nan_mask,zeros,raw_losses) raw_loss = tf.reduce_sum(clean_raw_losses) kl_divergence = model_params.get_divergence() loss_estimate = raw_loss * (total_data / data_count) + kl_divergence return loss_estimate,new_state,kl_divergence
def build_graph(self, image, label): assert tf.test.is_gpu_available() MEAN_IMAGE = tf.constant([0.4914, 0.4822, 0.4465], dtype=tf.float32) STD_IMAGE = tf.constant([0.2023, 0.1994, 0.2010], dtype=tf.float32) image = ((image / 255.0) - MEAN_IMAGE) / STD_IMAGE image = tf.transpose(image, [0, 3, 1, 2]) pytorch_default_init = tf.variance_scaling_initializer(scale=1.0 / 3, mode='fan_in', distribution='uniform') with argscope([Conv2D, BatchNorm, GlobalAvgPooling], data_format='channels_first'), \ argscope(Conv2D, kernel_initializer=pytorch_default_init): net = Conv2D('conv0', image, 64, kernel_size=3, strides=1, use_bias=False) for i, blocks_in_module in enumerate(MODULE_SIZES): for j in range(blocks_in_module): stride = 2 if j == 0 and i > 0 else 1 with tf.variable_scope("res%d.%d" % (i, j)): net = preactivation_block(net, FILTER_SIZES[i], stride) net = GlobalAvgPooling('gap', net) logits = FullyConnected('linear', net, CLASS_NUM, kernel_initializer=tf.random_normal_initializer(stddev=1e-3)) ce_cost = tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits) ce_cost = tf.reduce_mean(ce_cost, name='cross_entropy_loss') single_label = tf.to_int32(tf.argmax(label, axis=1)) wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, single_label, 1)), name='wrong_vector') # monitor training error add_moving_summary(tf.reduce_mean(wrong, name='train_error'), ce_cost) add_param_summary(('.*/W', ['histogram'])) # weight decay on all W matrixes. including convolutional layers wd_cost = tf.multiply(WEIGHT_DECAY, regularize_cost('.*', tf.nn.l2_loss), name='wd_cost') return tf.add_n([ce_cost, wd_cost], name='cost')
def build_graph(self, image, label): xys = np.array([(y, x, 1) for y in range(WARP_TARGET_SIZE) for x in range(WARP_TARGET_SIZE)], dtype='float32') xys = tf.constant(xys, dtype=tf.float32, name='xys') # p x 3 image = image / 255.0 - 0.5 # bhw2 def get_stn(image): stn = (LinearWrap(image) .AvgPooling('downsample', 2) .Conv2D('conv0', 20, 5, padding='VALID') .MaxPooling('pool0', 2) .Conv2D('conv1', 20, 5, padding='VALID') .FullyConnected('fc1', 32) .FullyConnected('fct', 6, activation=tf.identity, kernel_initializer=tf.constant_initializer(), bias_initializer=tf.constant_initializer([1, 0, HALF_DIFF, 0, 1, HALF_DIFF]))()) # output 6 parameters for affine transformation stn = tf.reshape(stn, [-1, 2, 3], name='affine') # bx2x3 stn = tf.reshape(tf.transpose(stn, [2, 0, 1]), [3, -1]) # 3 x (bx2) coor = tf.reshape(tf.matmul(xys, stn), [WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2]) coor = tf.transpose(coor, [2, 0, 1, 3], 'sampled_coords') # b h w 2 sampled = BilinearSample('warp', [image, coor], borderMode='constant') return sampled with argscope([Conv2D, FullyConnected], activation=tf.nn.relu): with tf.variable_scope('STN1'): sampled1 = get_stn(image) with tf.variable_scope('STN2'): sampled2 = get_stn(image) # For visualization in tensorboard with tf.name_scope('visualization'): padded1 = tf.pad(sampled1, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]]) padded2 = tf.pad(sampled2, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]]) img_orig = tf.concat([image[:, :, :, 0], image[:, :, :, 1]], 1) # b x 2h x w transform1 = tf.concat([padded1[:, :, :, 0], padded1[:, :, :, 1]], 1) transform2 = tf.concat([padded2[:, :, :, 0], padded2[:, :, :, 1]], 1) stacked = tf.concat([img_orig, transform1, transform2], 2, 'viz') tf.summary.image('visualize', tf.expand_dims(stacked, -1), max_outputs=30) sampled = tf.concat([sampled1, sampled2], 3, 'sampled_concat') logits = (LinearWrap(sampled) .FullyConnected('fc1', 256, activation=tf.nn.relu) .FullyConnected('fc2', 128, activation=tf.nn.relu) .FullyConnected('fct', 19, activation=tf.identity)()) tf.nn.softmax(logits, name='prob') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), name='incorrect_vector') summary.add_moving_summary(tf.reduce_mean(wrong, name='train_error')) wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss), name='regularize_loss') summary.add_moving_summary(cost, wd_cost) return tf.add_n([wd_cost, cost], name='cost')
def next_state(self, loop_state: LoopState) -> Tuple[tf.Tensor, Any, Any]: feedables = loop_state.feedables tr_feedables = feedables.other tr_histories = loop_state.histories.other with tf.variable_scope(self._variable_scope, reuse=tf.AUTO_REUSE): # shape (time, batch) input_sequence = append_tensor( tr_feedables.input_sequence, feedables.embedded_input, 1) unfinished_mask = tf.to_float(tf.logical_not(feedables.finished)) input_mask = append_tensor( tr_feedables.input_mask, tf.expand_dims(unfinished_mask, -1), axis=1) last_layer = self.layer( self.depth, input_sequence, tf.squeeze(input_mask, -1)) # (batch, state_size) output_state = last_layer.temporal_states[:, -1, :] new_feedables = TransformerFeedables( input_sequence=input_sequence, input_mask=input_mask) # TODO: do something more interesting here new_histories = tr_histories return (output_state, new_feedables, new_histories)
def prune_completely_outside_window(boxlist, window, scope=None): """Prunes bounding boxes that fall completely outside of the given window. The function clip_to_window prunes bounding boxes that fall completely outside the window, but also clips any bounding boxes that partially overflow. This function does not clip partially overflowing boxes. Args: boxlist: a BoxList holding M_in boxes. window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] of the window scope: name scope. Returns: pruned_boxlist: a new BoxList with all bounding boxes partially or fully in the window. valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes in the input tensor. """ with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'): y_min, x_min, y_max, x_max = tf.split( value=boxlist.get(), num_or_size_splits=4, axis=1) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) coordinate_violations = tf.concat([ tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max), tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min) ], 1) valid_indices = tf.reshape( tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) return gather(boxlist, valid_indices), valid_indices
def _has_foreground_and_background_in_first_frame(label, subsampling_factor): """Checks if the labels have foreground and background in the first frame. Args: label: Label tensor of shape [num_frames, height, width, 1]. subsampling_factor: Integer, the subsampling factor. Returns: Boolean, whether the labels have foreground and background in the first frame. """ h, w = train_utils.resolve_shape(label)[1:3] label_downscaled = tf.squeeze( tf.image.resize_nearest_neighbor(label[0, tf.newaxis], [h // subsampling_factor, w // subsampling_factor], align_corners=True), axis=0) is_bg = tf.equal(label_downscaled, 0) is_fg = tf.logical_not(is_bg) # Just using reduce_any was not robust enough, so lets make sure the count # is above MIN_LABEL_COUNT. fg_count = tf.reduce_sum(tf.cast(is_fg, tf.int32)) bg_count = tf.reduce_sum(tf.cast(is_bg, tf.int32)) has_bg = tf.greater_equal(fg_count, MIN_LABEL_COUNT) has_fg = tf.greater_equal(bg_count, MIN_LABEL_COUNT) return tf.logical_and(has_bg, has_fg)
def loop_continue_criterion(self, *args) -> tf.Tensor: """Decide whether to break out of the while loop. The criterion for stopping the loop is that either all hypotheses are finished or a maximum number of steps has been reached. Here the number of steps is the number of steps of the underlying decoder minus one, because this function is evaluated after the decoder step has been called and its step has been incremented. This is caused by the fact that we call the decoder body function at the end of the beam body function. (And that, in turn, is to support ensembling.) Arguments: args: A ``BeamSearchLoopState`` instance. Returns: A scalar boolean ``Tensor``. """ loop_state = BeamSearchLoopState(*args) beam_step = loop_state.decoder_loop_state.feedables.step - 1 finished = loop_state.search_state.finished max_step_cond = tf.less(beam_step, self.max_steps) unfinished_cond = tf.logical_not(tf.reduce_all(finished)) return tf.logical_and(max_step_cond, unfinished_cond)
def _match(self, similarity_matrix, valid_rows): """Bipartite matches a collection rows and columns. A greedy bi-partite. TODO(rathodv): Add num_valid_columns options to match only that many columns with all the rows. Args: similarity_matrix: Float tensor of shape [N, M] with pairwise similarity where higher values mean more similar. valid_rows: A boolean tensor of shape [N] indicating the rows that are valid. Returns: match_results: int32 tensor of shape [M] with match_results[i]=-1 meaning that column i is not matched and otherwise that it is matched to row match_results[i]. """ valid_row_sim_matrix = tf.gather(similarity_matrix, tf.squeeze(tf.where(valid_rows), axis=-1)) invalid_row_sim_matrix = tf.gather( similarity_matrix, tf.squeeze(tf.where(tf.logical_not(valid_rows)), axis=-1)) similarity_matrix = tf.concat( [valid_row_sim_matrix, invalid_row_sim_matrix], axis=0) # Convert similarity matrix to distance matrix as tf.image.bipartite tries # to find minimum distance matches. distance_matrix = -1 * similarity_matrix num_valid_rows = tf.reduce_sum(tf.to_float(valid_rows)) _, match_results = image_ops.bipartite_match( distance_matrix, num_valid_rows=num_valid_rows) match_results = tf.reshape(match_results, [-1]) match_results = tf.cast(match_results, tf.int32) return match_results
def _has_foreground_and_background_in_first_frame_2(label, decoder_output_stride): """Checks if the labels have foreground and background in the first frame. Second attempt, this time we use the actual output dimension for resizing. Args: label: Label tensor of shape [num_frames, height, width, 1]. decoder_output_stride: Integer, the stride of the decoder output. Returns: Boolean, whether the labels have foreground and background in the first frame. """ h, w = train_utils.resolve_shape(label)[1:3] h_sub = model.scale_dimension(h, 1.0 / decoder_output_stride) w_sub = model.scale_dimension(w, 1.0 / decoder_output_stride) label_downscaled = tf.squeeze( tf.image.resize_nearest_neighbor(label[0, tf.newaxis], [h_sub, w_sub], align_corners=True), axis=0) is_bg = tf.equal(label_downscaled, 0) is_fg = tf.logical_not(is_bg) # Just using reduce_any was not robust enough, so lets make sure the count # is above MIN_LABEL_COUNT. fg_count = tf.reduce_sum(tf.cast(is_fg, tf.int32)) bg_count = tf.reduce_sum(tf.cast(is_bg, tf.int32)) has_bg = tf.greater_equal(fg_count, MIN_LABEL_COUNT) has_fg = tf.greater_equal(bg_count, MIN_LABEL_COUNT) return tf.logical_and(has_bg, has_fg)
def __loss__(self): """ Calculate loss :return: """ # regularization ? self.d_loss_real = tf.reduce_mean(ops.binary_cross_entropy(preds=self.predict_d, targets=tf.ones_like(self.predict_d))) # tf.nn.sigmoid_cross_entropy_with_logits(logits=self.predict_d_logits, # labels=tf.ones_like(self.predict_d))) tf.summary.scalar('d_loss_real', self.d_loss_real, collections='D') self.d_loss_fake = tf.reduce_mean(ops.binary_cross_entropy(preds=self.predict_d_for_g, targets=tf.zeros_like(self.predict_d_for_g))) # tf.nn.sigmoid_cross_entropy_with_logits(logits=self.predict_d_logits_for_g, # labels=tf.zeros_like(self.predict_d_for_g))) tf.summary.scalar('d_loss_fake', self.d_loss_fake, collections='D') self.d_loss = self.d_loss_real + self.d_loss_fake tf.summary.scalar('d_loss', self.d_loss, collections='D') if len(self.regularization_values_d) > 0: reg_loss_d = self.reg_w * tf.reduce_sum(self.regularization_values_d) self.d_loss += reg_loss_d if self.FLAGS.dump_debug: tf.summary.scalar('d_loss_plus_reg', self.d_loss, collections='D') tf.summary.scalar('d_loss_reg_only', reg_loss_d, collections='D') # Generative loss g_loss = tf.reduce_mean(ops.binary_cross_entropy(preds=self.predict_d_for_g, targets=tf.ones_like(self.predict_d_for_g))) # tf.nn.sigmoid_cross_entropy_with_logits(logits=self.predict_d_logits_for_g, # labels=tf.ones_like(self.predict_d_for_g))) tf.summary.scalar('g_loss', g_loss, collections='G') # Context loss mask_not = tf.cast(tf.logical_not(tf.cast(self.labels['mask'], tf.bool)), tf.float32) real_diff = tf.contrib.layers.flatten(tf.multiply(self.predict_g['real'] - self.labels['real'], mask_not)) imag_diff = tf.contrib.layers.flatten(tf.multiply(self.predict_g['imag'] - self.labels['imag'], mask_not)) # real_diff = tf.multiply(tf.squeeze(self.predict_g['real']) - tf.squeeze(self.labels['real']), tf.squeeze(self.labels['mask'])) # imag_diff = tf.multiply(tf.squeeze(self.predict_g['imag']) - tf.squeeze(self.labels['imag']), tf.squeeze(self.labels['mask'])) self.context_loss = tf.reduce_mean(tf.square(real_diff) + tf.square(imag_diff), name='Context_loss_mean') tf.summary.scalar('g_loss_context_only', self.context_loss, collections='G') self.g_loss = self.adv_loss_w * g_loss + self.FLAGS.gen_loss_context * self.context_loss # self.g_loss = self.FLAGS.gen_loss_adversarial * g_loss + self.FLAGS.gen_loss_context * context_loss tf.summary.scalar('g_loss_plus_context', self.g_loss, collections='G') if len(self.regularization_values) > 0: reg_loss_g = self.reg_w * tf.reduce_sum(self.regularization_values) self.g_loss += reg_loss_g if self.FLAGS.dump_debug: tf.summary.scalar('g_loss_plus_context_plus_reg', self.g_loss, collections='G') tf.summary.scalar('g_loss_reg_only', reg_loss_g, collections='D') tf.summary.scalar('diff-loss', tf.abs(self.d_loss - self.g_loss), collections='G')
def filter_groundtruth_with_nan_box_coordinates(tensor_dict): """Filters out groundtruth with no bounding boxes. Args: tensor_dict: a dictionary of following groundtruth tensors - fields.InputDataFields.groundtruth_boxes fields.InputDataFields.groundtruth_classes fields.InputDataFields.groundtruth_confidences fields.InputDataFields.groundtruth_keypoints fields.InputDataFields.groundtruth_instance_masks fields.InputDataFields.groundtruth_is_crowd fields.InputDataFields.groundtruth_area fields.InputDataFields.groundtruth_label_types Returns: a dictionary of tensors containing only the groundtruth that have bounding boxes. """ groundtruth_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] nan_indicator_vector = tf.greater(tf.reduce_sum(tf.to_int32( tf.is_nan(groundtruth_boxes)), reduction_indices=[1]), 0) valid_indicator_vector = tf.logical_not(nan_indicator_vector) valid_indices = tf.where(valid_indicator_vector) return retain_groundtruth(tensor_dict, valid_indices)
def prediction_incorrect(logits, label, topk=1): """ :param logits: NxC :param label: N :returns: a float32 vector of length N with 0/1 values, 1 meaning incorrect prediction """ return tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, topk)), tf.float32)
def IoU(bbox, gt): # bbox = [ x , y , w , h ] ( x , y left up) shape = [-1, 1] x1 = tf.maximum(tf.cast(bbox[0], tf.float32), tf.reshape(tf.cast(gt[:,0], tf.float32), shape)) y1 = tf.maximum(tf.cast(bbox[1], tf.float32), tf.reshape(tf.cast(gt[:,1], tf.float32), shape)) x2 = tf.minimum(tf.cast(bbox[2] + bbox[0], tf.float32), tf.reshape(tf.cast(gt[:,2] + gt[:,0], tf.float32), shape)) y2 = tf.minimum(tf.cast(bbox[3] + bbox[1], tf.float32), tf.reshape(tf.cast(gt[:,3] + gt[:,1], tf.float32), shape)) inter_w = tf.sub(x2,x1) inter_h = tf.sub(y2,y1) inter = tf.cast(inter_w * inter_h, tf.float32) bounding_box = tf.cast(tf.mul(bbox[2],bbox[3]), tf.float32) ground_truth = tf.reshape(tf.cast(tf.mul(gt[:,2],gt[:,3]), tf.float32), shape) #iou = tf.div(inter,tf.sub(tf.add(bounding_box,tf.reshape(ground_truth,shape)),inter)) iou = inter / (bounding_box + ground_truth - inter) # limit the iou range between 0 and 1 mask_less = tf.cast(tf.logical_not(tf.less(iou, tf.zeros_like(iou))), tf.float32) #mask_great = tf.cast(tf.logical_not(tf.greater(iou, tf.ones_like(iou))), tf.float32) iou = tf.mul(iou, mask_less) #iou = tf.mul(iou, positive_mask) return iou
def _inverse_log_det_jacobian(self, y, use_saved_statistics=False): if not self.batchnorm.built: # Create variables. self.batchnorm.build(y.shape) event_dims = self.batchnorm.axis reduction_axes = [i for i in range(len(y.shape)) if i not in event_dims] # At training-time, ildj is computed from the mean and log-variance across # the current minibatch. # We use multiplication instead of tf.where() to get easier broadcasting. use_saved_statistics = tf.cast( tf.logical_or(use_saved_statistics, tf.logical_not(self._training)), tf.float32) log_variance = tf.log( (1 - use_saved_statistics) * tf.nn.moments(y, axes=reduction_axes, keep_dims=True)[1] + use_saved_statistics * self.batchnorm.moving_variance + self.batchnorm.epsilon) # `gamma` and `log Var(y)` reductions over event_dims. # Log(total change in area from gamma term). log_total_gamma = tf.reduce_sum(tf.log(self.batchnorm.gamma)) # Log(total change in area from log-variance term). log_total_variance = tf.reduce_sum(log_variance) # The ildj is scalar, as it does not depend on the values of x and are # constant across minibatch elements. return log_total_gamma - 0.5 * log_total_variance
def aggregate_single_gradient(grad_and_vars, use_mean, check_inf_nan): """Calculate the average gradient for a shared variable across all towers. Note that this function provides a synchronization point across all towers. Args: grad_and_vars: A list or tuple of (gradient, variable) tuples. Each (gradient, variable) pair within the outer list represents the gradient of the variable calculated for a single tower, and the number of pairs equals the number of towers. use_mean: if True, mean is taken, else sum of gradients is taken. check_inf_nan: check grads for nans and infs. Returns: The tuple ([(average_gradient, variable),], has_nan_or_inf) where the gradient has been averaged across all towers. The variable is chosen from the first tower. The has_nan_or_inf indicates the grads has nan or inf. """ grads = [g for g, _ in grad_and_vars] grad = tf.add_n(grads) if use_mean and len(grads) > 1: grad = tf.multiply(grad, 1.0 / len(grads)) v = grad_and_vars[0][1] if check_inf_nan: has_nan_or_inf = tf.logical_not(tf.reduce_all(tf.is_finite(grads))) return (grad, v), has_nan_or_inf else: return (grad, v), None
def read_record(filename_queue): class FCNRecord(object): pass result = FCNRecord() result.mask_height = int(420/DOWNSAMPLE_FACTOR) result.mask_width = int(580/DOWNSAMPLE_FACTOR) result.mask_depth = 1 result.img_depth = 1 img_len = result.mask_height*result.mask_width*result.img_depth mask_len = result.mask_height*result.mask_width*result.mask_depth record_len = img_len + mask_len reader = tf.FixedLengthRecordReader(record_bytes=record_len) result.key, value = reader.read(filename_queue) record_bytes = tf.decode_raw(value, tf.uint8) #print(record_bytes.get_shape()) int_image = tf.reshape(tf.slice(record_bytes, [0], [img_len]),[result.mask_height, result.mask_width]) rgb_image = tf.pack([int_image,int_image,int_image]) rgb_img = tf.transpose(rgb_image,(1,2,0)) result.image = tf.cast(rgb_img,tf.float32) bool_mask = tf.cast( tf.reshape(tf.slice(record_bytes, [img_len], [mask_len]),[result.mask_height, result.mask_width]), tf.bool) hot_mask= tf.pack( [bool_mask, tf.logical_not(bool_mask)]) h_mask = tf.transpose(hot_mask,(1,2,0)) result.mask = tf.cast(h_mask, tf.float32) return result
def tf_format_mnist_images(X, Y, Y_, n=100, lines=10): correct_prediction = tf.equal(tf.argmax(Y,1), tf.argmax(Y_,1)) correctly_recognised_indices = tf.squeeze(tf.where(correct_prediction), [1]) # indices of correctly recognised images incorrectly_recognised_indices = tf.squeeze(tf.where(tf.logical_not(correct_prediction)), [1]) # indices of incorrectly recognised images everything_incorrect_first = tf.concat([incorrectly_recognised_indices, correctly_recognised_indices], 0) # images reordered with indeces of unrecognised images first everything_incorrect_first = tf.slice(everything_incorrect_first, [0], [n]) # compute first 100 only - no space to display more anyway # compute n=100 digits to display only Xs = tf.gather(X, everything_incorrect_first) Ys = tf.gather(Y, everything_incorrect_first) Ys_ = tf.gather(Y_, everything_incorrect_first) correct_prediction_s = tf.gather(correct_prediction, everything_incorrect_first) digits_left = tf.image.grayscale_to_rgb(tensorflowvisu_digits.digits_left()) correct_tags = tf.gather(digits_left, tf.argmax(Ys_, 1)) # correct digits to be printed on the images digits_right = tf.image.grayscale_to_rgb(tensorflowvisu_digits.digits_right()) computed_tags = tf.gather(digits_right, tf.argmax(Ys, 1)) # computed digits to be printed on the images #superimposed_digits = correct_tags+computed_tags superimposed_digits = tf.where(correct_prediction_s, tf.zeros_like(correct_tags),correct_tags+computed_tags) # only pring the correct and computed digits on unrecognised images correct_bkg = tf.reshape(tf.tile([1.3,1.3,1.3], [28*28]), [1, 28,28,3]) # white background incorrect_bkg = tf.reshape(tf.tile([1.3,1.0,1.0], [28*28]), [1, 28,28,3]) # red background recognised_bkg = tf.gather(tf.concat([incorrect_bkg, correct_bkg], 0), tf.cast(correct_prediction_s, tf.int32)) # pick either the red or the white background depending on recognised status I = tf.image.grayscale_to_rgb(Xs) I = ((1-(I+superimposed_digits))*recognised_bkg)/1.3 # stencil extra data on top of images and reorder them unrecognised first I = tf.image.convert_image_dtype(I, tf.uint8, saturate=True) Islices = [] # 100 images => 10x10 image block for imslice in range(lines): Islices.append(tf.concat(tf.unstack(tf.slice(I, [imslice*n//lines,0,0,0], [n//lines,28,28,3])), 1)) I = tf.concat(Islices, 0) return I
def kl_divergence(distribution_a, distribution_b, allow_nan_stats=True, name=None): """Get the KL-divergence KL(distribution_a || distribution_b). If there is no KL method registered specifically for `type(distribution_a)` and `type(distribution_b)`, then the class hierarchies of these types are searched. If one KL method is registered between any pairs of classes in these two parent hierarchies, it is used. If more than one such registered method exists, the method whose registered classes have the shortest sum MRO paths to the input types is used. If more than one such shortest path exists, the first method identified in the search is used (favoring a shorter MRO distance to `type(distribution_a)`). Args: distribution_a: The first distribution. distribution_b: The second distribution. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Returns: A Tensor with the batchwise KL-divergence between `distribution_a` and `distribution_b`. Raises: NotImplementedError: If no KL method is defined for distribution types of `distribution_a` and `distribution_b`. """ kl_fn = _registered_kl(type(distribution_a), type(distribution_b)) if kl_fn is None: # TODO(b/117098119): For backwards compatibility, we check TF's registry as # well. This typically happens when this function is called on a pair of # TF's distributions. with deprecation.silence(): return tf.distributions.kl_divergence(distribution_a, distribution_b) with tf.name_scope("KullbackLeibler"): kl_t = kl_fn(distribution_a, distribution_b, name=name) if allow_nan_stats: return kl_t # Check KL for NaNs kl_t = tf.identity(kl_t, name="kl") with tf.control_dependencies([ tf.Assert( tf.logical_not( tf.reduce_any(tf.is_nan(kl_t))), ["KL calculation between %s and %s returned NaN values " "(and was called with allow_nan_stats=False). Values:" % (distribution_a.name, distribution_b.name), kl_t])]): return tf.identity(kl_t, name="checked_kl")
def _not(self, x, use_gpu=False): np_ans = np.logical_not(x) with self.test_session(use_gpu=use_gpu): out = tf.logical_not(tf.convert_to_tensor(x)) tf_val = out.eval() self.assertEqual(out.dtype, tf.bool) self.assertAllEqual(np_ans, tf_val) self.assertShapeEqual(np_ans, out)
def lamb_func(logit, logic, lamb): logit_pos = tf.boolean_mask(logit, logic) logit_neg = tf.boolean_mask(logit, tf.logical_not(logic)) logit_neg_exp = tf.exp(logit_neg * lamb) z = tf.reduce_mean(logit_neg_exp) left = tf.truediv(tf.reduce_mean(logit_neg * logit_neg_exp), z) right = tf.reduce_mean(logit_pos) return left, right
def buildModel(self, inputShape, inMatFilename): if (inMatFilename): npWeights = loadWeights(inMatFilename) #Running on GPU with tf.device('gpu:0'): with tf.name_scope("inputOps"): #Get convolution variables as placeholders self.inputImage = node_variable( [None, inputShape[0], inputShape[1], inputShape[2]], "inputImage") self.gt = node_variable([None, 2], "gt") #Model variables for convolutions with tf.name_scope("Conv1Ops"): if (inMatFilename): self.W_conv1 = weight_variable_fromnp( npWeights["conv1_w"], "w_conv1") self.B_conv1 = weight_variable_fromnp( npWeights["conv1_b"], "b_conv1") else: self.W_conv1 = weight_variable_fromnp( np.zeros((11, 11, 3, 64), dtype=np.float32), "w_conv1") self.B_conv1 = weight_variable_fromnp( np.zeros((64), dtype=np.float32), "b_conv1") #self.W_conv1 = weight_variable_xavier([11, 11, 3, 64], "w_conv1", conv=True) #self.B_conv1 = bias_variable([64], "b_conv1") self.h_conv1 = tf.nn.relu( conv2d(self.inputImage, self.W_conv1, "conv1", stride=[1, 4, 4, 1]) + self.B_conv1) self.h_norm1 = tf.nn.local_response_normalization(self.h_conv1, name="LRN1") self.h_pool1 = maxpool_2x2(self.h_norm1, "pool1") with tf.name_scope("Conv2Ops"): if (inMatFilename): self.W_conv2 = weight_variable_fromnp( npWeights["conv2_w"], "w_conv2") self.B_conv2 = weight_variable_fromnp( npWeights["conv2_b"], "b_conv2") else: self.W_conv2 = weight_variable_fromnp( np.zeros((5, 5, 64, 256), dtype=np.float32), "w_conv2") self.B_conv2 = weight_variable_fromnp( np.zeros((256), dtype=np.float32), "b_conv2") #self.W_conv2 = weight_variable_xavier([5, 5, 64, 256], "w_conv2", conv=True) #self.B_conv2 = bias_variable([256], "b_conv2") self.h_conv2 = tf.nn.relu( conv2d(self.h_pool1, self.W_conv2, "conv2") + self.B_conv2) self.h_norm2 = tf.nn.local_response_normalization(self.h_conv2, name="LRN2") self.h_pool2 = maxpool_2x2(self.h_norm2, "pool2") with tf.name_scope("Conv3Ops"): if (inMatFilename): self.W_conv3 = weight_variable_fromnp( npWeights["conv3_w"], "w_conv3") self.B_conv3 = weight_variable_fromnp( npWeights["conv3_b"], "b_conv3") else: self.W_conv3 = weight_variable_fromnp( np.zeros((3, 3, 256, 256), dtype=np.float32), "w_conv3") self.B_conv3 = weight_variable_fromnp( np.zeros((256), dtype=np.float32), "b_conv3") #self.W_conv3 = weight_variable_xavier([3, 3, 256, 256], "w_conv3", conv=True) #self.B_conv3 = bias_variable([256], "b_conv3") self.h_conv3 = tf.nn.relu( conv2d(self.h_pool2, self.W_conv3, "conv3") + self.B_conv3, name="relu3") with tf.name_scope("Conv4Ops"): if (inMatFilename): self.W_conv4 = weight_variable_fromnp( npWeights["conv4_w"], "w_conv4") self.B_conv4 = weight_variable_fromnp( npWeights["conv4_b"], "b_conv4") else: self.W_conv4 = weight_variable_fromnp( np.zeros((3, 3, 256, 256), dtype=np.float32), "w_conv4") self.B_conv4 = weight_variable_fromnp( np.zeros((256), dtype=np.float32), "b_conv4") #self.W_conv4 = weight_variable_xavier([3, 3, 256, 256], "w_conv4", conv=True) #self.B_conv4 = bias_variable([256], "b_conv4") self.h_conv4 = tf.nn.relu( conv2d(self.h_conv3, self.W_conv4, "conv4") + self.B_conv4, name="relu4") with tf.name_scope("Conv5Ops"): if (inMatFilename): self.W_conv5 = weight_variable_fromnp( npWeights["conv5_w"], "w_conv5") self.B_conv5 = weight_variable_fromnp( npWeights["conv5_b"], "b_conv5") else: self.W_conv5 = weight_variable_fromnp( np.zeros((3, 3, 256, 256), dtype=np.float32), "w_conv5") self.B_conv5 = weight_variable_fromnp( np.zeros((256), dtype=np.float32), "b_conv5") #self.W_conv5 = weight_variable_xavier([3, 3, 256, 256], "w_conv5", conv=True) #self.B_conv5 = bias_variable([256], "b_conv5") self.h_conv5 = tf.nn.relu( conv2d(self.h_conv4, self.W_conv5, "conv5") + self.B_conv5) self.h_pool5 = maxpool_2x2(self.h_conv5, "pool5") #placeholder for specifying dropout self.keep_prob = tf.placeholder(tf.float32) #32 comes from 4 stride in conv1, 2 stride in pool1, 2 stride in pool2, 2 stride in pool5 numInputs = (inputShape[0] / 32) * (inputShape[1] / 32) * 256 with tf.name_scope("FC1"): #if(inMatFilename): # self.W_conv5 = weight_variable_fromnp(npWeights["fc1_w"], "w_fc1") # self.B_conv5 = weight_variable_fromnp(npWeights["fc1_b"], "b_fc1") #else: # self.W_conv5 = weight_variable_fromnp(np.zeros((6*6*256, 4096), dtype=np.float32), "w_fc1") # self.B_conv5 = weight_variable_fromnp(np.zeros((4096), dtype = np.float32), "b_fc1") self.W_fc1 = weight_variable_xavier([numInputs, 4096], "w_fc1") self.B_fc1 = bias_variable([4096], "b_fc1") h_pool5_flat = tf.reshape(self.h_pool5, [-1, numInputs], name="pool5_flat") self.h_fc1 = tf.nn.relu( tf.matmul(h_pool5_flat, self.W_fc1, name="fc1") + self.B_fc1, "fc1_relu") self.h_fc1_drop = tf.nn.dropout(self.h_fc1, self.keep_prob) with tf.name_scope("FC2"): #if(inMatFilename): # self.W_conv5 = weight_variable_fromnp(npWeights["fc2_w"], "w_fc2") # self.B_conv5 = weight_variable_fromnp(npWeights["fc2_b"], "b_fc2") #else: # self.W_conv5 = weight_variable_fromnp(np.zeros((4096, 4096), dtype=np.float32), "w_fc2") # self.B_conv5 = weight_variable_fromnp(np.zeros((4096), dtype = np.float32), "b_fc2") self.W_fc2 = weight_variable_xavier([4096, 4096], "w_fc2") self.B_fc2 = bias_variable([4096], "b_fc2") self.h_fc2 = tf.nn.relu( tf.matmul(self.h_fc1_drop, self.W_fc2, name="fc2") + self.B_fc2, "fc2_relu") self.h_fc2_drop = tf.nn.dropout(self.h_fc2, self.keep_prob) #fc3 should have 16 channels #fc3 also uses a sigmoid function #We change it to tanh with tf.name_scope("FC3"): #if(inMatFilename): # self.W_conv5 = weight_variable_fromnp(npWeights["fc3_w"], "w_fc3") # self.B_conv5 = weight_variable_fromnp(npWeights["fc3_b"], "b_fc3") #else: # self.W_conv5 = weight_variable_fromnp(np.zeros((4096, 2), dtype=np.float32), "w_fc3") # self.B_conv5 = weight_variable_fromnp(np.zeros((2), dtype = np.float32), "b_fc3") self.W_fc3 = weight_variable_xavier([4096, 2], "w_fc3") self.B_fc3 = bias_variable([2], "b_fc3") self.est = tf.nn.softmax( tf.matmul(self.h_fc2_drop, self.W_fc3, name="fc3") + self.B_fc3, "fc3_softmax") with tf.name_scope("Loss"): #Define loss #self.loss = tf.reduce_mean(-tf.reduce_sum(self.gt * tf.log(self.est), reduction_indices=[1])) self.loss = tf.reduce_mean( -(self.gt[:, 1] * .8 * tf.log(self.est[:, 1]) + self.gt[:, 0] * .2 * tf.log(self.est[:, 0]))) with tf.name_scope("Opt"): #Define optimizer #self.optimizerAll = tf.train.AdagradOptimizer(self.learningRate).minimize(self.loss) #self.optimizerFC = tf.train.AdagradOptimizer(self.learningRate).minimize(self.loss, self.optimizerAll = tf.train.AdamOptimizer( self.learningRate).minimize(self.loss) self.optimizerFC = tf.train.AdamOptimizer( self.learningRate).minimize(self.loss, var_list=[ self.W_fc1, self.B_fc1, self.W_fc2, self.B_fc2, self.W_fc3, self.B_fc3 ]) with tf.name_scope("Metric"): self.gtIdx = tf.argmax(self.gt, 1) self.estIdx = tf.argmax(self.est, 1) boolGtIdx = tf.cast(self.gtIdx, tf.bool) boolEstIdx = tf.cast(self.estIdx, tf.bool) #Logical and for true positive lAnd = tf.logical_and(boolGtIdx, boolEstIdx) self.tp = tf.reduce_sum(tf.cast(lAnd, tf.float32)) #Logical nor for true negatives lNor = tf.logical_not(tf.logical_or(boolGtIdx, boolEstIdx)) self.tn = tf.reduce_sum(tf.cast(lNor, tf.float32)) #Subtraction and comparison for others lSub = self.gtIdx - self.estIdx Ones = tf.cast(tf.ones(tf.shape(lSub)), tf.int64) self.fn = tf.reduce_sum( tf.cast(tf.equal(lSub, Ones), tf.float32)) self.fp = tf.reduce_sum( tf.cast(tf.equal(lSub, -Ones), tf.float32)) #Accuracy, precision, and recall calculations self.accuracy = (self.tp + self.tn) / (self.tp + self.tn + self.fp + self.fn) self.precision = self.tp / (self.tp + self.fp) self.recall = self.tp / (self.tp + self.fn) #Summaries tf.scalar_summary('loss', self.loss, name="lossSum") tf.scalar_summary('accuracy', self.accuracy, name="accSum") tf.scalar_summary('precision', self.precision, name="precSum") tf.scalar_summary('recall', self.recall, name="recallSum") tf.scalar_summary('tp', self.tp, name="tp") tf.scalar_summary('fp', self.fp, name="fp") tf.scalar_summary('tn', self.tn, name="tn") tf.scalar_summary('fn', self.fn, name="fn") tf.histogram_summary('input', self.inputImage, name="image") tf.histogram_summary('gt', self.gt, name="gt") tf.histogram_summary('conv1', self.h_pool1, name="conv1") tf.histogram_summary('conv2', self.h_pool2, name="conv2") tf.histogram_summary('conv3', self.h_conv3, name="conv3") tf.histogram_summary('conv4', self.h_conv4, name="conv4") tf.histogram_summary('conv5', self.h_pool5, name="conv5") tf.histogram_summary('fc1', self.h_fc1, name="fc1") tf.histogram_summary('fc2', self.h_fc2, name="fc2") tf.histogram_summary('est', self.est, name="fc3") tf.histogram_summary('w_conv1', self.W_conv1, name="w_conv1") tf.histogram_summary('b_conv1', self.B_conv1, name="b_conv1") tf.histogram_summary('w_conv2', self.W_conv2, name="w_conv2") tf.histogram_summary('b_conv2', self.B_conv2, name="b_conv2") tf.histogram_summary('w_conv3', self.W_conv3, name="w_conv3") tf.histogram_summary('b_conv3', self.B_conv3, name="b_conv3") tf.histogram_summary('w_conv4', self.W_conv4, name="w_conv4") tf.histogram_summary('b_conv4', self.B_conv4, name="b_conv4") tf.histogram_summary('w_conv5', self.W_conv5, name="w_conv5") tf.histogram_summary('b_conv5', self.B_conv5, name="b_conv5") tf.histogram_summary('w_fc1', self.W_fc1, name="w_fc1") tf.histogram_summary('b_fc1', self.B_fc1, name="b_fc1") tf.histogram_summary('w_fc2', self.W_fc2, name="w_fc2") tf.histogram_summary('b_fc2', self.B_fc2, name="b_fc2") tf.histogram_summary('w_fc3', self.W_fc3, name="w_fc3") tf.histogram_summary('b_fc3', self.B_fc3, name="b_fc3") #Define saver self.saver = tf.train.Saver()
def ssd_decode_and_crop(image_buffer, boxes, classes, raw_shape): """Crop image randomly and decode the cropped region. This function will crop an image to meet the following requirements: 1. height to width ratio between 0.5 and 2; 2. IoUs of some boxes exceed specified threshold; 3. At least one box center is in the cropped region. We defer the jpeg decoding task until after the crop to avoid wasted work. Reference: https://github.com/chauhan-utk/ssd.DomainAdaptation Args: image_buffer: Tensor tf.string containing the contents of a JPEG file. boxes: Tensor tf.float32 of shape [num_boxes, 4], containing coordinates of object bounding boxes. classes: Tensor tf.int64 of shape [num_boxes, 1], containing class labels of objects. raw_shape: [height, width, 3]. Returns: resized_image: decoded, cropped, and resized image Tensor tf.float32 of shape [ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE, 3], value range 0--255. cropped_boxes: box coordinates for objects in the cropped region. cropped_classes: class labels for objects in the cropped region. """ num_boxes = tf.shape(boxes)[0] def no_crop_check(): return (tf.random_uniform( shape=(), minval=0, maxval=1, dtype=tf.float32) < ssd_constants.P_NO_CROP_PER_PASS) def no_crop_proposal(): return ( tf.ones((), tf.bool), tf.convert_to_tensor([0, 0, 1, 1], dtype=tf.float32), tf.ones((num_boxes, ), tf.bool), ) def crop_proposal(): rand_vec = lambda minval, maxval: tf.random_uniform(shape=( ssd_constants.NUM_CROP_PASSES, 1), minval=minval, maxval=maxval, dtype=tf.float32) width, height = rand_vec(0.3, 1), rand_vec(0.3, 1) left, top = rand_vec(0, 1 - width), rand_vec(0, 1 - height) right = left + width bottom = top + height ltrb = tf.concat([left, top, right, bottom], axis=1) min_iou = tf.random_shuffle(ssd_constants.CROP_MIN_IOU_CHOICES)[0] ious = calc_iou_tensor(ltrb, boxes) # discard any bboxes whose center not in the cropped image xc, yc = [ tf.tile(0.5 * (boxes[:, i + 0] + boxes[:, i + 2])[tf.newaxis, :], (ssd_constants.NUM_CROP_PASSES, 1)) for i in range(2) ] masks = tf.reduce_all(tf.stack([ tf.greater(xc, tf.tile(left, (1, num_boxes))), tf.less(xc, tf.tile(right, (1, num_boxes))), tf.greater(yc, tf.tile(top, (1, num_boxes))), tf.less(yc, tf.tile(bottom, (1, num_boxes))), ], axis=2), axis=2) # Checks of whether a crop is valid. valid_aspect = tf.logical_and(tf.less(height / width, 2), tf.less(width / height, 2)) valid_ious = tf.reduce_all(tf.greater(ious, min_iou), axis=1, keepdims=True) valid_masks = tf.reduce_any(masks, axis=1, keepdims=True) valid_all = tf.cast( tf.reduce_all(tf.concat([valid_aspect, valid_ious, valid_masks], axis=1), axis=1), tf.int32) # One indexed, as zero is needed for the case of no matches. index = tf.range(1, 1 + ssd_constants.NUM_CROP_PASSES, dtype=tf.int32) # Either one-hot, or zeros if there is no valid crop. selection = tf.equal(tf.reduce_max(index * valid_all), index) use_crop = tf.reduce_any(selection) output_ltrb = tf.reduce_sum(tf.multiply( ltrb, tf.tile(tf.cast(selection, tf.float32)[:, tf.newaxis], (1, 4))), axis=0) output_masks = tf.reduce_any(tf.logical_and( masks, tf.tile(selection[:, tf.newaxis], (1, num_boxes))), axis=0) return use_crop, output_ltrb, output_masks def proposal(*args): return tf.cond( pred=no_crop_check(), true_fn=no_crop_proposal, false_fn=crop_proposal, ) _, crop_bounds, box_masks = tf.while_loop( cond=lambda x, *_: tf.logical_not(x), body=proposal, loop_vars=[ tf.zeros((), tf.bool), tf.zeros((4, ), tf.float32), tf.zeros((num_boxes, ), tf.bool) ], ) filtered_boxes = tf.boolean_mask(boxes, box_masks, axis=0) # Clip boxes to the cropped region. filtered_boxes = tf.stack([ tf.maximum(filtered_boxes[:, 0], crop_bounds[0]), tf.maximum(filtered_boxes[:, 1], crop_bounds[1]), tf.minimum(filtered_boxes[:, 2], crop_bounds[2]), tf.minimum(filtered_boxes[:, 3], crop_bounds[3]), ], axis=1) left = crop_bounds[0] top = crop_bounds[1] width = crop_bounds[2] - left height = crop_bounds[3] - top cropped_boxes = tf.stack([ (filtered_boxes[:, 0] - left) / width, (filtered_boxes[:, 1] - top) / height, (filtered_boxes[:, 2] - left) / width, (filtered_boxes[:, 3] - top) / height, ], axis=1) # crop_window containing integer coordinates of cropped region. A normalized # coordinate value of y should be mapped to the image coordinate at # y * (height - 1). raw_shape = tf.cast(raw_shape, tf.float32) crop_window = tf.stack([ left * (raw_shape[0] - 1), top * (raw_shape[1] - 1), width * raw_shape[0], height * raw_shape[1] ]) crop_window = tf.cast(crop_window, tf.int32) # Fused op only decodes the cropped portion of an image cropped_image = tf.image.decode_and_crop_jpeg(image_buffer, crop_window, channels=3) # Resize converts image dtype from uint8 to float32, without rescaling values. resized_image = tf.image.resize_images( cropped_image, [ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE]) cropped_classes = tf.boolean_mask(classes, box_masks, axis=0) return resized_image, cropped_boxes, cropped_classes
def condition(time, unused_outputs_ta, unused_state, unused_inputs, finished): return tf.logical_not(tf.reduce_all(finished))
def connect_data_and_network(self, outputs_collector=None, gradients_collector=None): if self.is_training: self.patience = self.action_param.patience def switch_sampler(for_training): with tf.name_scope('train' if for_training else 'validation'): sampler = self.get_sampler()[0][0 if for_training else -1] return sampler.pop_batch_op() if self.action_param.validation_every_n > 0: data_dict = tf.cond(tf.logical_not(self.is_validation), lambda: switch_sampler(for_training=True), lambda: switch_sampler(for_training=False)) else: data_dict = switch_sampler(for_training=True) images = tf.cast(data_dict['image'], tf.float32) noise_shape = [self.net_param.batch_size, self.gan_param.noise_size] noise = tf.random_normal(shape=noise_shape, mean=0.0, stddev=1.0, dtype=tf.float32) conditioning = data_dict['conditioning'] net_output = self.net( noise, images, conditioning, self.is_training) loss_func = LossFunction( loss_type=self.action_param.loss_type) real_logits = net_output[1] fake_logits = net_output[2] lossG, lossD = loss_func(real_logits, fake_logits) if self.net_param.decay > 0: reg_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) if reg_losses: reg_loss = tf.reduce_mean( [tf.reduce_mean(l_reg) for l_reg in reg_losses]) lossD = lossD + reg_loss lossG = lossG + reg_loss self.total_loss = lossD + lossG outputs_collector.add_to_collection( var=self.total_loss, name='total_loss', average_over_devices=True, collection=CONSOLE) outputs_collector.add_to_collection( var=self.total_loss, name='total_loss', average_over_devices=True, summary_type='scalar', collection=TF_SUMMARIES) # variables to display in STDOUT outputs_collector.add_to_collection( var=lossD, name='lossD', average_over_devices=True, collection=CONSOLE) outputs_collector.add_to_collection( var=lossG, name='lossG', average_over_devices=False, collection=CONSOLE) # variables to display in tensorboard outputs_collector.add_to_collection( var=lossG, name='lossG', average_over_devices=False, collection=TF_SUMMARIES) outputs_collector.add_to_collection( var=lossG, name='lossD', average_over_devices=True, collection=TF_SUMMARIES) with tf.name_scope('Optimiser'): optimiser_class = OptimiserFactory.create( name=self.action_param.optimiser) self.optimiser = optimiser_class.get_instance( learning_rate=self.action_param.lr) with tf.name_scope('ComputeGradients'): # gradients of generator generator_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') generator_grads = self.optimiser.compute_gradients( lossG, var_list=generator_variables, colocate_gradients_with_ops=True) # gradients of discriminator discriminator_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator') discriminator_grads = self.optimiser.compute_gradients( lossD, var_list=discriminator_variables, colocate_gradients_with_ops=True) grads = [generator_grads, discriminator_grads] # add the grads back to application_driver's training_grads gradients_collector.add_to_collection(grads) else: data_dict = self.get_sampler()[0][0].pop_batch_op() conditioning_dict = self.get_sampler()[1][0].pop_batch_op() conditioning = conditioning_dict['conditioning'] image_size = conditioning.shape.as_list()[:-1] dummy_image = tf.zeros(image_size + [1]) net_output = self.net(data_dict['vector'], dummy_image, conditioning, self.is_training) outputs_collector.add_to_collection( var=net_output[0], name='image', average_over_devices=False, collection=NETWORK_OUTPUT) outputs_collector.add_to_collection( var=conditioning_dict['conditioning_location'], name='location', average_over_devices=False, collection=NETWORK_OUTPUT) self.output_decoder = WindowAsImageAggregator( image_reader=self.readers[0], output_path=self.action_param.save_seg_dir)
def get_geometric_idxs(atoms, adjacency_map): """ Find the bond, angles, and torsion indices in a molecular graph or graphs. Parameters ---------- atoms : tf.Tensor, dtype=tf.int64, a tensor denoting the sequence of type of atoms adjacency_map : tf.Tensor, dtype=tf.int64, upper triangular tensor representing the adjacency map of the molecules Returns ------- bond_idxs angle_idxs torsion_idxs """ # get the attributes of the molecule # adjacency_map = mol[1] # atoms = mol[0] adjacency_map_full = adjacency_map \ + tf.transpose(adjacency_map) n_atoms = tf.cast(tf.shape(atoms)[0], tf.int64) # (n_atoms, n_atoms, 2) all_idxs_stack = tf.stack( tf.meshgrid( tf.range(n_atoms, dtype=tf.int64), tf.range(n_atoms, dtype=tf.int64)), axis=2) # (n_atoms, n_atoms, 2) # boolean is_bond = tf.greater( adjacency_map, tf.constant(0, dtype=tf.float32)) # (n_bonds, 2) bond_idxs = tf.boolean_mask( all_idxs_stack, is_bond) n_bonds = tf.cast(tf.shape(bond_idxs)[0], tf.int64) # init the angles idxs to be all negative ones angle_idxs = tf.constant([[-1, -1, -1]], dtype=tf.int64) @tf.function def process_one_atom_if_there_is_angle(idx, angle_idxs, adjacency_map_full=adjacency_map_full): # get all the connection indices connection_idxs = tf.where( tf.greater( adjacency_map_full[idx, :], tf.constant(0, dtype=tf.float32))) # get the number of connections n_connections = tf.shape(connection_idxs)[0] # get the combinations from these connection indices connection_combinations = tf.gather_nd( tf.stack( tf.meshgrid( connection_idxs, connection_idxs), axis=2), tf.where( tf.greater( tf.linalg.band_part( tf.ones( ( n_connections, n_connections ), dtype=tf.int64), 0, -1), tf.constant(0, dtype=tf.int64)))) connection_combinations = tf.boolean_mask( connection_combinations, tf.greater( connection_combinations[:, 0] \ - connection_combinations[:, 1], tf.constant(0, dtype=tf.int64))) angle_idxs = tf.concat( [ angle_idxs, tf.concat( [ tf.expand_dims( connection_combinations[:, 0], 1), tf.expand_dims( idx * tf.ones( (tf.shape(connection_combinations)[0], ), dtype=tf.int64), 1), tf.expand_dims( connection_combinations[:, 1], 1) ], axis=1) ], axis=0) return idx + 1, angle_idxs @tf.function def process_one_atom(idx, angle_idxs, adjacency_map_full=adjacency_map_full): if tf.less( tf.math.count_nonzero(adjacency_map_full[idx, :]), tf.constant(1, dtype=tf.int64)): return idx+1, angle_idxs else: return process_one_atom_if_there_is_angle(idx, angle_idxs) idx = tf.constant(0, dtype=tf.int64) # use while loop to update the indices forming the angles idx, angle_idxs = tf.while_loop( # condition lambda idx, angle_idxs: tf.less(idx, n_atoms), process_one_atom, [idx, angle_idxs], shape_invariants=[ idx.get_shape(), tf.TensorShape((None, 3))]) # discard the first row angle_idxs = angle_idxs[1:, ] n_angles = tf.shape(angle_idxs, tf.int64)[0] # init the torsion idxs to be all negative ones torsion_idxs = tf.constant([[-1, -1, -1, -1]], dtype=tf.int64) # for each bond, there is at least one torsion terms associated def process_one_bond_if_there_is_torsion(idx, torsion_idxs): bond = bond_idxs[idx] left_atom_connections = tf.where( tf.greater( adjacency_map_full[bond[0]], tf.constant(0, dtype=tf.float32))) right_atom_connections = tf.where( tf.greater( adjacency_map_full[bond[1]], tf.constant(0, dtype=tf.float32))) # get the combinations from these connection indices connection_combinations = tf.reshape( tf.stack( tf.meshgrid( left_atom_connections, right_atom_connections), axis=2), [-1, 2]) torsion_idxs = tf.concat( [ torsion_idxs, tf.concat( [ tf.expand_dims( connection_combinations[:, 0], 1), bond[0] * tf.ones( (tf.shape(connection_combinations)[0], 1), dtype=tf.int64), bond[1] * tf.ones( (tf.shape(connection_combinations)[0], 1), dtype=tf.int64), tf.expand_dims( connection_combinations[:, 1], 1) ], axis=1) ], axis=0) return idx + 1, torsion_idxs def process_one_bond(idx, torsion_idxs): if tf.logical_not( tf.logical_and( tf.greater( tf.math.count_nonzero( adjacency_map_full[bond_idxs[idx][0]]), tf.constant(1, dtype=tf.int64)), tf.greater( tf.math.count_nonzero( adjacency_map_full[bond_idxs[idx][1]]), tf.constant(1, dtype=tf.int64)))): return idx + 1, torsion_idxs else: return process_one_bond_if_there_is_torsion( idx, torsion_idxs) idx = tf.constant(0, dtype=tf.int64) idx, torsion_idxs = tf.while_loop( # condition lambda idx, _: tf.less(idx, tf.shape(bond_idxs, tf.int64)[0]), # body process_one_bond, # vars [idx, torsion_idxs], shape_invariants=[ idx.get_shape(), tf.TensorShape([None, 4]) ]) # get rid of the first one torsion_idxs = torsion_idxs[1:, ] torsion_idxs = tf.boolean_mask( torsion_idxs, tf.logical_and( tf.logical_not( tf.equal( torsion_idxs[:, 0] - torsion_idxs[:, 2], tf.constant(0, dtype=tf.int64))), tf.logical_not( tf.equal( torsion_idxs[:, 1] - torsion_idxs[:, 3], tf.constant(0, dtype=tf.int64))))) return bond_idxs, angle_idxs, torsion_idxs
def prediction_incorrect(logits, label, topk=1, name='incorrect_vector'): with tf.name_scope('prediction_incorrect'): x = tf.logical_not(tf.nn.in_top_k(logits, label, topk)) return tf.cast(x, tf.float32, name=name)
def _call( self, atoms, # NOTE: here there could be more than one mol adjacency_map, coordinates, atom_in_mol=False, # (n_atoms, ) batched_attr_in_mol=False, repeat=3): """ More general __call__ method. """ # get the attributes of the molecule # adjacency_map = mol[1] # atoms = mol[0] adjacency_map_full = adjacency_map \ + tf.transpose(adjacency_map) n_atoms = tf.cast(tf.shape(atoms)[0], tf.int64) # (n_atoms, n_atoms, 2) all_idxs_stack = tf.stack( tf.meshgrid( tf.range(n_atoms, dtype=tf.int64), tf.range(n_atoms, dtype=tf.int64)), axis=2) # (n_atoms, n_atoms, 2) # boolean is_bond = tf.greater( adjacency_map, tf.constant(0, dtype=tf.float32)) bond_idxs, angle_idxs, torsion_idxs = get_geometric_idxs( atoms, adjacency_map) # get the dimensinos of the indices n_atoms = tf.shape(atoms, tf.int64)[0] n_bonds = tf.shape(bond_idxs, tf.int64)[0] n_angles = tf.shape(angle_idxs, tf.int64)[0] n_torsions = tf.shape(torsion_idxs, tf.int64)[0] # grab atoms that are at the two ends of a bond # (n_bonds, 2) left_idxs = bond_idxs[:, 0] # (n_bonds, 2) right_idxs = bond_idxs[:, 1] if tf.logical_not(tf.reduce_any(atom_in_mol)): atom_in_mol = tf.tile( [[True]], [n_atoms, 1]) if tf.logical_not(tf.reduce_any(batched_attr_in_mol)): batched_attr_in_mol = tf.constant([[True]]) # (n_bonds, n_atoms) bond_is_connected_to_atoms = tf.logical_or( tf.equal( tf.tile( tf.expand_dims( tf.range(n_atoms), 0), [n_bonds, 1]), tf.tile( tf.expand_dims( bond_idxs[:,0], 1), [1, n_atoms])), tf.equal( tf.tile( tf.expand_dims( tf.range(n_atoms), 0), [n_bonds, 1]), tf.tile( tf.expand_dims( bond_idxs[:,1], 1), [1, n_atoms]))) # (n_atoms, n_bonds) atom_is_connected_to_bonds = tf.transpose( bond_is_connected_to_atoms) # (n_angles, n_atoms) angle_is_connected_to_atoms = tf.reduce_any( [ tf.equal( tf.tile( tf.expand_dims( tf.range(n_atoms), 0), [n_angles, 1]), tf.tile( tf.expand_dims( angle_idxs[:, 0], 1), [1, n_atoms])), tf.equal( tf.tile( tf.expand_dims( tf.range(n_atoms), 0), [n_angles, 1]), tf.tile( tf.expand_dims( angle_idxs[:, 1], 1), [1, n_atoms])), tf.equal( tf.tile( tf.expand_dims( tf.range(n_atoms), 0), [n_angles, 1]), tf.tile( tf.expand_dims( angle_idxs[:, 2], 1), [1, n_atoms])) ], axis=0) # (n_torsions, n_atoms) torsion_is_connected_to_atoms = tf.reduce_any( [ tf.equal( tf.tile( tf.expand_dims( tf.range(n_atoms), 0), [n_torsions, 1]), tf.tile( tf.expand_dims( torsion_idxs[:, 0], 1), [1, n_atoms])), tf.equal( tf.tile( tf.expand_dims( tf.range(n_atoms), 0), [n_torsions, 1]), tf.tile( tf.expand_dims( torsion_idxs[:, 1], 1), [1, n_atoms])), tf.equal( tf.tile( tf.expand_dims( tf.range(n_atoms), 0), [n_torsions, 1]), tf.tile( tf.expand_dims( torsion_idxs[:, 2], 1), [1, n_atoms])), tf.equal( tf.tile( tf.expand_dims( tf.range(n_atoms), 0), [n_torsions, 1]), tf.tile( tf.expand_dims( torsion_idxs[:, 3], 1), [1, n_atoms])) ], axis=0) # (n_bonds, ) # NOTE: here we use the same boolean mask as before, so they # should be following the same order bond_orders = tf.boolean_mask( adjacency_map, is_bond) bond_distances = tf.boolean_mask( gin.deterministic.md.get_distance_matrix(coordinates), is_bond) angle_angles = gin.deterministic.md.get_angles( coordinates, angle_idxs) torsion_dihedrals = gin.deterministic.md.get_dihedrals( coordinates, torsion_idxs) # initialize the hidden layers # (n_bonds, ...) h_e = self.f_e( tf.expand_dims(bond_orders, 1)) h_e_0 = h_e h_e_history = tf.expand_dims(h_e_0, 1) d_e = tf.shape(h_e, tf.int64)[1] # (n_atoms, ...) h_v = self.f_v(atoms) h_v_0 = h_v h_v_history = tf.expand_dims(h_v_0, 1) d_v = tf.shape(h_v, tf.int64)[1] # (n_angles, ...) h_a = self.f_a( tf.concat( [ tf.gather( h_v, angle_idxs[:, 1]), tf.math.add( tf.gather( h_v, angle_idxs[:, 0]), tf.gather( h_v, angle_idxs[:, 1])) ], axis=1)) h_a_0 = h_a h_a_history = tf.expand_dims(h_a_0, 1) d_a = tf.shape(h_a, tf.int64)[1] # (n_torsions, ...) h_t = self.f_t( tf.concat( [ tf.math.add( tf.gather( h_v, torsion_idxs[:, 0]), tf.gather( h_v, torsion_idxs[:, 3])), tf.math.add( tf.gather( h_v, torsion_idxs[:, 1]), tf.gather( h_v, torsion_idxs[:, 2])) ], axis=1)) h_t_0 = h_t h_t_history = tf.expand_dims(h_t_0, 1) d_t = tf.shape(h_t, tf.int64)[1] # (n_mols, ...) # NOTE: here $h_u$ could have more than one first dimensions h_u = self.f_u(atoms, adjacency_map, batched_attr_in_mol) h_u_0 = h_u h_u_history = tf.expand_dims(h_u_0, 1) d_u = tf.shape(h_u, tf.int64)[1] n_mols = tf.shape(h_u, tf.int64)[0] # specify what we know about the shape of the mask atom_in_mol.set_shape([None, None]) atom_in_mol = tf.boolean_mask( atom_in_mol, tf.reduce_any( atom_in_mol, axis=0), axis=1) bond_in_mol = tf.greater( tf.matmul( tf.where( bond_is_connected_to_atoms, tf.ones_like( bond_is_connected_to_atoms, tf.int64), tf.zeros_like( bond_is_connected_to_atoms, tf.int64)), tf.where( atom_in_mol, tf.ones_like( atom_in_mol, tf.int64), tf.zeros_like( atom_in_mol, tf.int64))), tf.constant(0, dtype=tf.int64)) angle_in_mol = tf.greater( tf.matmul( tf.where( angle_is_connected_to_atoms, tf.ones_like( angle_is_connected_to_atoms, tf.int64), tf.zeros_like( angle_is_connected_to_atoms, tf.int64)), tf.where( atom_in_mol, tf.ones_like( atom_in_mol, tf.int64), tf.zeros_like( atom_in_mol, tf.int64))), tf.constant(0, dtype=tf.int64)) torsion_in_mol = tf.greater( tf.matmul( tf.where( torsion_is_connected_to_atoms, tf.ones_like( torsion_is_connected_to_atoms, tf.int64), tf.zeros_like( torsion_is_connected_to_atoms, tf.int64)), tf.where( atom_in_mol, tf.ones_like( atom_in_mol, tf.int64), tf.zeros_like( atom_in_mol, tf.int64))), tf.constant(0, dtype=tf.int64)) def propagate_one_time( iter_idx, h_v, h_e, h_a, h_t, h_u, h_v_history, h_e_history, h_a_history, h_t_history, h_u_history, atom_in_mol=atom_in_mol, # (n_atoms, n_mols) bond_in_mol=bond_in_mol, # (n_bonds, n_mols) angle_in_mol=angle_in_mol, torsion_in_mol=torsion_in_mol ): # update $ e'_k $ # $$ # e'_k = \phi^e (e_k, v_{rk}, v_{sk}, u) # $$ h_left = tf.gather( h_v, left_idxs) h_right = tf.gather( h_v, right_idxs) h_left_right = h_left + h_right # (n_bonds, d_e) h_e = self.phi_e(h_e, h_e_0, h_left_right, tf.reduce_sum( tf.boolean_mask( tf.tile( tf.expand_dims( h_u, # (n_mols, d_u) 0), # (1, n_mols, d_u) [tf.shape(h_e)[0], 1, 1]), bond_in_mol), axis=1, keepdims=True)) h_e_history = tf.concat( [ h_e_history, tf.expand_dims( h_e, 1) ], axis=1) # aggregate $ \bar{e_i'} $ # $$ # \bar{e_i'} = \rho^{e \rightarrow v} (E'_i) # $$ # (n_atoms, d_e) h_e_bar_i = self.rho_e_v(h_e, atom_is_connected_to_bonds) # update $ v'_i $ # $$ # v'_i = phi^v (\bar{e_i}, v_i, u) # $$ # (n_atoms, d_v) h_v = self.phi_v( h_v, # (n_atoms, d_v) h_v_0, # (n_atoms, d_v) h_e_bar_i, # (n_atoms, d_v) tf.reduce_sum( tf.where( tf.tile( tf.expand_dims( atom_in_mol, 2), [1, 1, tf.shape(h_u)[1]]), tf.tile( tf.expand_dims( h_u, 0), [n_atoms, 1, 1]), tf.zeros_like( tf.tile( tf.expand_dims( h_u, 0), [n_atoms, 1, 1]))), axis=1)) h_v_history = tf.concat( [ h_v_history, tf.expand_dims( h_v, 1) ], axis=1) h_v_center = tf.gather( h_v, angle_idxs[:, 1]) h_v_sides = tf.math.add( tf.gather( h_v, angle_idxs[:, 0]), tf.gather( h_v, angle_idxs[:, 2])) h_a = self.phi_a( h_a, h_a_0, h_v_center, h_v_sides, tf.reduce_sum( tf.boolean_mask( tf.tile( tf.expand_dims( h_u, # (n_mols, d_u) 0), # (1, n_mols, d_u) [tf.shape(h_a)[0], 1, 1]), angle_in_mol), axis=1, keepdims=True)) h_a_history = tf.concat( [ h_a_history, tf.expand_dims(h_a, 1) ], axis=1) h_v_center = tf.math.add( tf.gather( h_v, torsion_idxs[:, 1]), tf.gather( h_v, torsion_idxs[:, 2])) h_v_sides = tf.math.add( tf.gather( h_v, torsion_idxs[:, 0]), tf.gather( h_v, torsion_idxs[:, 2])) h_t = self.phi_t( h_t, h_t_0, h_v_center, h_v_sides, tf.reduce_sum( tf.boolean_mask( tf.tile( tf.expand_dims( h_u, # (n_mols, d_u) 0), # (1, n_mols, d_u) [tf.shape(h_t)[0], 1, 1]), torsion_in_mol), axis=1, keepdims=True)) h_t_history = tf.concat( [ h_t_history, tf.expand_dims(h_t, 1) ], axis=1) # aggregate $ \bar{e'} $ # $$ # \bar{e'} = \rhp^{e \rightarrow u} (E') # $$ # (n_mols, d_e) h_e_bar = self.rho_e_u(h_e, bond_in_mol) # aggregate $ \bar{v'} $ # $$ # \bar{v'} = \rho^{v \rightarrow u} (V') # $$ # (n_mols, d_v) h_v_bar = self.rho_v_u(h_v, atom_in_mol) # aggregate $ \bar{a'} $ h_a_bar = self.rho_a_u(h_a, angle_in_mol) # aggregate $ \bar{t} $ h_t_bar = self.rho_t_u(h_t, torsion_in_mol) # update $ u' $ # $$ # u' = \phi^u (\bar{e'}, \bar{v'}, u) # $$ # (n_mols, d_u) h_u = self.phi_u( h_u, h_u_0, h_e_bar, h_v_bar, h_a_bar, h_t_bar) h_u_history = tf.concat( [ h_u_history, tf.expand_dims( h_u, 1) ], axis=1) return ( iter_idx + 1, h_v, h_e, h_a, h_t, h_u, h_v_history, h_e_history, h_a_history, h_t_history, h_u_history) a = propagate_one_time(0, h_v, h_e, h_a, h_t, h_u, \ h_v_history, h_e_history, h_a_history, \ h_t_history, h_u_history) # use while loop to execute the graph multiple times iter_idx = tf.constant(0, dtype=tf.int64) iter_idx, h_v, h_e, h_a, h_t, h_u, \ h_v_history, h_e_history, h_a_history, \ h_t_history, h_u_history \ = tf.while_loop( # condition lambda \ iter_idx, \ h_v, h_e, h_a, h_t, h_u, \ h_v_history, h_e_history, h_a_history, h_t_history, \ h_u_history: \ tf.less(iter_idx, self.repeat), # loop body propagate_one_time, # loop vars [ iter_idx, h_v, h_e, h_a, h_t, h_u, h_v_history, h_e_history, h_a_history, h_t_history, h_u_history ], # shape_invariants shape_invariants = [ iter_idx.get_shape(), h_v.get_shape(), h_e.get_shape(), h_a.get_shape(), h_t.get_shape(), h_u.get_shape(), tf.TensorShape((None, None, None)), tf.TensorShape((None, None, None)), tf.TensorShape((None, None, None)), tf.TensorShape((None, None, None)), tf.TensorShape((None, None, None)), ]) y_bar = self.f_r( h_v, h_e, h_a, h_t, h_u, h_v_history, h_e_history, h_a_history, h_t_history, h_u_history, atom_in_mol, bond_in_mol, angle_in_mol, torsion_in_mol, adjacency_map, coordinates) return y_bar
def bboxes_matching(label, scores, bboxes, glabels, gbboxes, gdifficults, matching_threshold=0.5, scope=None): """Matching a collection of detected boxes with groundtruth values. Does not accept batched-inputs. The algorithm goes as follows: for every detected box, check if one grountruth box is matching. If none, then considered as False Positive. If the grountruth box is already matched with another one, it also counts as a False Positive. We refer the Pascal VOC documentation for the details. Args: rclasses, rscores, rbboxes: N(x4) Tensors. Detected objects, sorted by score; glabels, gbboxes: Groundtruth bounding boxes. May be zero padded, hence zero-class objects are ignored. matching_threshold: Threshold for a positive match. Return: Tuple of: n_gbboxes: Scalar Tensor with number of groundtruth boxes (may difer from size because of zero padding). tp_match: (N,)-shaped boolean Tensor containing with True Positives. fp_match: (N,)-shaped boolean Tensor containing with False Positives. """ with tf.name_scope(scope, 'bboxes_matching_single', [scores, bboxes, glabels, gbboxes]): rsize = tf.size(scores) rshape = tf.shape(scores) rlabel = tf.cast(label, glabels.dtype) # Number of groundtruth boxes. gdifficults = tf.cast(gdifficults, tf.bool) n_gbboxes = tf.count_nonzero( tf.logical_and(tf.equal(glabels, label), tf.logical_not(gdifficults))) # Grountruth matching arrays. gmatch = tf.zeros(tf.shape(glabels), dtype=tf.bool) grange = tf.range(tf.size(glabels), dtype=tf.int32) # True/False positive matching TensorArrays. sdtype = tf.bool ta_tp_bool = tf.TensorArray(sdtype, size=rsize, dynamic_size=False, infer_shape=True) ta_fp_bool = tf.TensorArray(sdtype, size=rsize, dynamic_size=False, infer_shape=True) # Loop over returned objects. def m_condition(i, ta_tp, ta_fp, gmatch): r = tf.less(i, rsize) return r def m_body(i, ta_tp, ta_fp, gmatch): # Jaccard score with groundtruth bboxes. rbbox = bboxes[i] jaccard = bboxes_jaccard(rbbox, gbboxes) jaccard = jaccard * tf.cast(tf.equal(glabels, rlabel), dtype=jaccard.dtype) # Best fit, checking it's above threshold. idxmax = tf.cast(tf.argmax(jaccard, axis=0), tf.int32) jcdmax = jaccard[idxmax] match = jcdmax > matching_threshold existing_match = gmatch[idxmax] not_difficult = tf.logical_not(gdifficults[idxmax]) # TP: match & no previous match and FP: previous match | no match. # If difficult: no record, i.e FP=False and TP=False. tp = tf.logical_and( not_difficult, tf.logical_and(match, tf.logical_not(existing_match))) ta_tp = ta_tp.write(i, tp) fp = tf.logical_and( not_difficult, tf.logical_or(existing_match, tf.logical_not(match))) ta_fp = ta_fp.write(i, fp) # Update grountruth match. mask = tf.logical_and(tf.equal(grange, idxmax), tf.logical_and(not_difficult, match)) gmatch = tf.logical_or(gmatch, mask) return [i + 1, ta_tp, ta_fp, gmatch] # Main loop definition. i = 0 [i, ta_tp_bool, ta_fp_bool, gmatch] = \ tf.while_loop(m_condition, m_body, [i, ta_tp_bool, ta_fp_bool, gmatch], parallel_iterations=1, back_prop=False) # TensorArrays to Tensors and reshape. tp_match = tf.reshape(ta_tp_bool.stack(), rshape) fp_match = tf.reshape(ta_fp_bool.stack(), rshape) # Some debugging information... # tp_match = tf.Print(tp_match, # [n_gbboxes, # tf.reduce_sum(tf.cast(tp_match, tf.int64)), # tf.reduce_sum(tf.cast(fp_match, tf.int64)), # tf.reduce_sum(tf.cast(gmatch, tf.int64))], # 'Matching (NG, TP, FP, GM): ') return n_gbboxes, tp_match, fp_match
def body(time, outputs_ta, state, inputs, finished, sequence_lengths): r"""Internal while_loop body. Args: time: scalar int32 tensor. outputs_ta: structure of TensorArray. state: (structure of) state tensors and TensorArrays. inputs: (structure of) input tensors. finished: bool tensor (keeping track of what's finished). sequence_lengths: int32 tensor (keeping track of time of finish). Returns: `(time + 1, outputs_ta, next_state, next_inputs, next_finished, next_sequence_lengths)`. """ (next_outputs, state) = decoder.step(time, inputs, state) # Check if the maximum iteration is met. If it is met, do not compute # the next inputs. reach_max = tf.equal(time+1, maximum_iterations) (decoder_finished, next_inputs, decoder_state) = tf.cond( reach_max, lambda: (tf.cast(tf.ones_like(finished), tf.bool), inputs, state), lambda: decoder.next_inputs(time, next_outputs, state) ) if decoder.tracks_own_finished: next_finished = decoder_finished else: next_finished = tf.logical_or(decoder_finished, finished) next_sequence_lengths = tf.where( tf.logical_not(finished), tf.fill(tf.shape(sequence_lengths), time + 1), sequence_lengths) nest.assert_same_structure(state, decoder_state) nest.assert_same_structure(outputs_ta, next_outputs) nest.assert_same_structure(inputs, next_inputs) # Zero out output values past finish if impute_finished: emit = nest.map_structure( lambda out, zero: tf.where(finished, zero, out), next_outputs, zero_outputs) else: emit = next_outputs # Copy through states past finish def _maybe_copy_state(new, cur): # TensorArrays and scalar states get passed through. if isinstance(cur, tf.TensorArray): pass_through = True else: new.set_shape(cur.shape) pass_through = (new.shape.ndims == 0) return new if pass_through else tf.where(finished, cur, new) if impute_finished: next_state = nest.map_structure( _maybe_copy_state, decoder_state, state) else: next_state = decoder_state outputs_ta = nest.map_structure(lambda ta, out: ta.write(time, out), outputs_ta, emit) return (time + 1, outputs_ta, next_state, next_inputs, next_finished, next_sequence_lengths)
def __call__(self, inputs, state, scope=None): if not isinstance(state, CopyNetWrapperState): raise TypeError( 'Expected state to be instance of CopyNetWrapperState. Received type {} instead.' .format(type(state))) prev_cell_state = state.cell_state prev_time = state.time prev_predicted_ids = state.predicted_ids prev_alignments = state.alignments prev_coverage = state.coverage prev_alignment_history = state.alignment_history mask = tf.cast(tf.equal(prev_predicted_ids, self._encoder_input_ids), tf.float32) mask = tf.math.divide_no_nan( mask, tf.reduce_sum(mask, axis=-1, keepdims=True)) rou = mask * prev_alignments selective_read = tf.einsum('ijk,ij->ik', self._encoder_outputs, rou) inputs = tf.concat( [inputs, selective_read], axis=-1) # (batch_size, embedding_size + encoder_state_size) cell_outputs, cell_state = self._cell(inputs, prev_cell_state, scope) generate_score = self._projection( cell_outputs) # (batch_size, gen_vocab_size) copy_score = tf.einsum('ijk,km->ijm', self._encoder_outputs, self._copy_weight) copy_score = tf.nn.tanh(copy_score) copy_score = tf.einsum('ijm,im->ij', copy_score, cell_outputs) # (batch_size, seq_len) if self._encoder_input_length is not None: mask = tf.sequence_mask(self._encoder_input_length) mask = tf.cast(tf.logical_not(mask), dtype=tf.float32) copy_score += -1e9 * mask mixed_score = tf.concat([generate_score, copy_score], axis=-1) mixed_prob = tf.math.softmax(mixed_score, axis=-1) generate_prob = mixed_prob[:, :self._gen_vocab_size] copy_prob = mixed_prob[:, self._gen_vocab_size:] # expand probability to [batch_size, whole_vocab_size] expanded_generate_prob = tf.pad( generate_prob, [[0, 0], [0, self._whole_vocab_size - self._gen_vocab_size]]) expanded_copy_prob = self._expand_copy_prob(copy_prob) outputs = expanded_generate_prob + expanded_copy_prob # the output is probability not logits predicted_ids = tf.expand_dims(tf.argmax(outputs, axis=-1, output_type=tf.int32), axis=-1) alignments = copy_prob coverage = prev_coverage + copy_prob if self._alignment_history: alignment_history = prev_alignment_history.write( prev_time, copy_prob) else: alignment_history = prev_alignment_history state = CopyNetWrapperState(cell_state=cell_state, time=prev_time + 1, predicted_ids=predicted_ids, alignments=alignments, coverage=coverage, alignment_history=alignment_history) return outputs, state
def main(_): if not FLAGS.data_dir: raise ValueError('You must supply the dataset directory with --data_dir') num_gpus = FLAGS.num_gpus if num_gpus < 1: num_gpus = 1 # ps_spec = FLAGS.ps_hosts.split(",") # worker_spec = FLAGS.worker_hosts.split(",") # num_workers = len(worker_spec) # cluster = tf.train.ClusterSpec({ # "ps": ps_spec, # "worker": worker_spec}) # server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) # if FLAGS.job_name == "ps": # with tf.device("/cpu:0"): # server.join() # return tf.logging.set_verbosity(tf.logging.DEBUG) with tf.device('/cpu:0'): global_step = slim.create_global_step() # Select the dataset. dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.data_dir) # Get the RON network and its anchors. ron_class = nets_factory.get_network(FLAGS.model_name) ron_params = ron_class.default_params._replace(num_classes=FLAGS.num_classes) ron_net = ron_class(ron_params) ron_shape = ron_net.params.img_shape ron_anchors = ron_net.anchors(ron_shape) # =================================================================== # # Create a dataset provider and batches. # =================================================================== # with tf.name_scope(FLAGS.dataset_name + '_data_provider'): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=120 * FLAGS.batch_size * num_gpus, common_queue_min=80 * FLAGS.batch_size * num_gpus, shuffle=True) # Get for RON network: image, labels, bboxes. # (ymin, xmin, ymax, xmax) fro gbboxes [image, shape, glabels, gbboxes, isdifficult] = provider.get(['image', 'shape', 'object/label', 'object/bbox', 'object/difficult']) isdifficult_mask =tf.cond(tf.reduce_sum(tf.cast(tf.logical_not(tf.equal(tf.ones_like(isdifficult), isdifficult)), tf.float32)) < 1., lambda : tf.one_hot(0, tf.shape(isdifficult)[0], on_value=True, off_value=False, dtype=tf.bool), lambda : isdifficult < tf.ones_like(isdifficult)) glabels = tf.boolean_mask(glabels, isdifficult_mask) gbboxes = tf.boolean_mask(gbboxes, isdifficult_mask) # Select the preprocessing function. preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) # Pre-processing image, labels and bboxes. image, glabels, gbboxes = image_preprocessing_fn(image, glabels, gbboxes, out_shape=ron_shape, data_format=DATA_FORMAT) # Encode groundtruth labels and bboxes. # glocalisations is our regression object # gclasses is the ground_trutuh label # gscores is the the jaccard score with ground_truth gclasses, glocalisations, gscores = \ ron_net.bboxes_encode(glabels, gbboxes, ron_anchors, positive_threshold=FLAGS.match_threshold, ignore_threshold=FLAGS.neg_threshold) # each size of the batch elements # include one image, three others(gclasses, glocalisations, gscores) batch_shape = [1] + [len(ron_anchors)] * 3 # Training batches and queue. r = tf.train.batch( tf_utils.reshape_list([image, gclasses, glocalisations, gscores]), batch_size=FLAGS.batch_size * num_gpus, num_threads=FLAGS.num_preprocessing_threads, capacity=120 * FLAGS.batch_size * num_gpus) all_batch = tf_utils.reshape_list(r, batch_shape) b_image = tf.split(all_batch[0], num_or_size_splits=num_gpus, axis=0) _b_gclasses = [tf.split(b, num_or_size_splits=num_gpus, axis=0) for b in all_batch[1]] b_gclasses = [_ for _ in zip(*_b_gclasses)] _b_glocalisations = [tf.split(b, num_or_size_splits=num_gpus, axis=0) for b in all_batch[2]] b_glocalisations = [_ for _ in zip(*_b_glocalisations)] _b_gscores = [tf.split(b, num_or_size_splits=num_gpus, axis=0) for b in all_batch[3]] b_gscores = [_ for _ in zip(*_b_gscores)] # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # =================================================================== # # Configure the optimization procedure. # =================================================================== # learning_rate = tf_utils.configure_learning_rate(FLAGS, dataset.num_samples, global_step) optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) # Construct RON network. arg_scope = ron_net.arg_scope(weight_decay=FLAGS.weight_decay, data_format=DATA_FORMAT) reuse_variables = False tower_grads = [] loss_list = [] with slim.arg_scope(arg_scope): for index in range(num_gpus): with tf.device('/gpu:%d' % index): predictions, logits, objness_pred, objness_logits, localisations, end_points = ron_net.net(b_image[index], is_training=True, reuse = reuse_variables) # Add loss function. ron_net.losses(logits, localisations, objness_logits, objness_pred, b_gclasses[index], b_glocalisations[index], b_gscores[index], match_threshold = FLAGS.match_threshold, neg_threshold = FLAGS.neg_threshold, objness_threshold = FLAGS.objectness_thres, negative_ratio=FLAGS.negative_ratio, alpha=FLAGS.loss_alpha, beta=FLAGS.loss_beta, label_smoothing=FLAGS.label_smoothing) reuse_variables = True # and returns a train_tensor and summary_op loss = tf.losses.get_total_loss() loss_list.append(loss) # Variables to train. variables_to_train = tf_utils.get_variables_to_train(FLAGS) # Create gradient updates. grads = optimizer.compute_gradients(loss, variables_to_train) tower_grads.append(grads) reduce_grads = average_gradients(tower_grads) total_loss = tf.reduce_mean(tf.stack(loss_list, axis=0), axis=0) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # =================================================================== # # Configure the moving averages. # =================================================================== # if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None if FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append(variable_averages.apply(moving_average_variables)) grad_updates = optimizer.apply_gradients(reduce_grads, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, name='train_op') # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') # =================================================================== # # Kicks off the training. # =================================================================== # config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours = FLAGS.save_interval_secs/3600., write_version=2, pad_step_number=False) slim.learning.train( train_tensor, logdir=FLAGS.model_dir, master='', is_chief=True, init_fn=tf_utils.get_init_fn(FLAGS, os.path.join(FLAGS.data_dir, 'vgg_16.ckpt')), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, saver=saver, save_interval_secs=FLAGS.save_interval_secs, session_config=config, session_wrapper=None, sync_optimizer=None)
def condition2(sigma, ak, am): sigma = tf.matmul(sigma, testOp2) return tf.logical_not( tf.reduce_all(tf.equal(sigma, tf.zeros([4], dtype=tf.float64))))
def rpn_losses(self): with tf.variable_scope('rpn_losses'): minibatch_indices, minibatch_anchor_matched_gtboxes, \ object_mask, minibatch_labels_one_hot = self.make_minibatch(self.anchors) minibatch_anchors = tf.gather(self.anchors, minibatch_indices) minibatch_encode_boxes = tf.gather(self.rpn_encode_boxes, minibatch_indices) minibatch_boxes_scores = tf.gather(self.rpn_scores, minibatch_indices) # encode gtboxes minibatch_encode_gtboxes = encode_and_decode.encode_boxes( unencode_boxes=minibatch_anchor_matched_gtboxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) positive_anchors_in_img = draw_box_with_color( self.img_batch, minibatch_anchors * tf.expand_dims(object_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 1.0)))[0]) negative_mask = tf.cast( tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32) negative_anchors_in_img = draw_box_with_color( self.img_batch, minibatch_anchors * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 0.0)))[0]) minibatch_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=minibatch_encode_boxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) tf.summary.image('/positive_anchors', positive_anchors_in_img) tf.summary.image('/negative_anchors', negative_anchors_in_img) minibatch_boxes_softmax_scores = tf.gather( slim.softmax(self.rpn_scores), minibatch_indices) top_k_scores, top_k_indices = tf.nn.top_k( minibatch_boxes_softmax_scores[:, 1], k=20) top_k_boxes = tf.gather(minibatch_decode_boxes, top_k_indices) top_detections_in_img = draw_boxes_with_scores(self.img_batch, boxes=top_k_boxes, scores=top_k_scores) tf.summary.image('/top_20', top_detections_in_img) temp_indices = tf.reshape( tf.where(tf.greater(top_k_scores, cfgs.FINAL_SCORE_THRESHOLD)), [-1]) rpn_predict_boxes = tf.gather(top_k_boxes, temp_indices) rpn_predict_scores = tf.gather(top_k_scores, temp_indices) # losses with tf.variable_scope('rpn_location_loss'): location_loss = losses.l1_smooth_losses( predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=object_mask) slim.losses.add_loss( location_loss) # add smooth l1 loss to losses collection with tf.variable_scope('rpn_classification_loss'): classification_loss = slim.losses.softmax_cross_entropy( logits=minibatch_boxes_scores, onehot_labels=minibatch_labels_one_hot) return location_loss, classification_loss, rpn_predict_boxes, rpn_predict_scores
def custom_dynamic_rnn(cell, inputs, inputs_len, initial_state=None): """ Implements a dynamic rnn that can store scores in the pointer network, the reason why we implements this is that the raw_rnn or dynamic_rnn function in Tensorflow seem to require the hidden unit and memory unit has the same dimension, and we cannot store the scores directly in the hidden unit. Args: cell: RNN cell inputs: the input sequence to rnn inputs_len: valid length initial_state: initial_state of the cell Returns: outputs and state """ batch_size, max_time = tf.shape(inputs)[0], tf.shape(inputs)[1] inputs_ta = tf.TensorArray(dtype=tf.float32, size=max_time) inputs_ta = inputs_ta.unstack(tf.transpose(inputs, [1, 0, 2])) # record cells emit_ta = tf.TensorArray(dtype=tf.float32, dynamic_size=True, size=0) # iter timesteps t0 = tf.constant(0, dtype=tf.int32) if initial_state is not None: # initial state s0 = initial_state else: s0 = cell.zero_state(batch_size, dtype=tf.float32) # f0 = tf.zeros([batch_size], dtype=tf.bool) def loop_fn(t, prev_s, emit_ta, finished): """ the loop function of rnn """ cur_x = inputs_ta.read(t) # use pre cell state and current input to predict the scores and current state ### dimension of scores: (batchsize, hiddensize) equal to cur_x ### the score is the logit of each position at each sample ### current state is a tuple (hidden state, cell state) scores, cur_state = cell(cur_x, prev_s) # copy through scores = tf.where(finished, tf.zeros_like(scores), scores) if isinstance(cell, tc.rnn.LSTMCell): cur_c, cur_h = cur_state prev_c, prev_h = prev_s cur_state = tc.rnn.LSTMStateTuple(tf.where(finished, prev_c, cur_c), tf.where(finished, prev_h, cur_h)) else: cur_state = tf.where(finished, prev_s, cur_state) ### store the logit scores of each step emit_ta = emit_ta.write(t, scores) finished = tf.greater_equal(t + 1, inputs_len) return [t + 1, cur_state, emit_ta, finished] _, state, emit_ta, _ = tf.while_loop( cond=lambda _1, _2, _3, finished: tf.logical_not(tf.reduce_all(finished)), body=loop_fn, loop_vars=(t0, s0, emit_ta, f0), parallel_iterations=32, swap_memory=False) outputs = tf.transpose(emit_ta.stack(), [1, 0, 2]) return outputs, state
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size, beam_width, end_token, length_penalty_weight, coverage_penalty_weight): """Performs a single step of Beam Search Decoding. Args: time: Beam search time step, should start at 0. At time 0 we assume that all beams are equal and consider only the first beam for continuations. logits: Logits at the current time step. A tensor of shape `[batch_size, beam_width, vocab_size]` next_cell_state: The next state from the cell, e.g. an instance of AttentionWrapperState if the cell is attentional. beam_state: Current state of the beam search. An instance of `BeamSearchDecoderState`. batch_size: The batch size for this input. beam_width: Python int. The size of the beams. end_token: The int32 end token. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. coverage_penalty_weight: Float weight to penalize the coverage of source sentence. Disabled with 0.0. Returns: A new beam state. """ static_batch_size = tf.get_static_value(batch_size) # Calculate the current lengths of the predictions prediction_lengths = beam_state.lengths previously_finished = beam_state.finished not_finished = tf.logical_not(previously_finished) # Calculate the total log probs for the new hypotheses # Final Shape: [batch_size, beam_width, vocab_size] step_log_probs = tf.nn.log_softmax(logits) step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished) total_probs = tf.expand_dims(beam_state.log_probs, 2) + step_log_probs # Calculate the continuation lengths by adding to all continuing beams. vocab_size = logits.shape.dims[-1].value or tf.shape(logits)[-1] lengths_to_add = tf.one_hot( indices=tf.fill([batch_size, beam_width], end_token), depth=vocab_size, on_value=np.int64(0), off_value=np.int64(1), dtype=tf.int64) add_mask = tf.cast(not_finished, tf.int64) lengths_to_add *= tf.expand_dims(add_mask, 2) new_prediction_lengths = ( lengths_to_add + tf.expand_dims(prediction_lengths, 2)) # Calculate the accumulated attention probabilities if coverage penalty is # enabled. accumulated_attention_probs = None attention_probs = get_attention_probs(next_cell_state, coverage_penalty_weight) if attention_probs is not None: attention_probs *= tf.expand_dims(tf.cast(not_finished, tf.float32), 2) accumulated_attention_probs = ( beam_state.accumulated_attention_probs + attention_probs) # Calculate the scores for each beam scores = _get_scores( log_probs=total_probs, sequence_lengths=new_prediction_lengths, length_penalty_weight=length_penalty_weight, coverage_penalty_weight=coverage_penalty_weight, finished=previously_finished, accumulated_attention_probs=accumulated_attention_probs) time = tf.convert_to_tensor(time, name="time") # During the first time step we only consider the initial beam scores_flat = tf.reshape(scores, [batch_size, -1]) # Pick the next beams according to the specified successors function next_beam_size = tf.convert_to_tensor( beam_width, dtype=tf.int32, name="beam_width") next_beam_scores, word_indices = tf.math.top_k( scores_flat, k=next_beam_size) next_beam_scores.set_shape([static_batch_size, beam_width]) word_indices.set_shape([static_batch_size, beam_width]) # Pick out the probs, beam_ids, and states according to the chosen # predictions next_beam_probs = _tensor_gather_helper( gather_indices=word_indices, gather_from=total_probs, batch_size=batch_size, range_size=beam_width * vocab_size, gather_shape=[-1], name="next_beam_probs") # Note: just doing the following # tf.to_int32(word_indices % vocab_size, # name="next_beam_word_ids") # would be a lot cleaner but for reasons unclear, that hides the results of # the op which prevents capturing it with tfdbg debug ops. raw_next_word_ids = tf.math.floormod( word_indices, vocab_size, name="next_beam_word_ids") next_word_ids = tf.cast(raw_next_word_ids, tf.int32) next_beam_ids = tf.cast( word_indices / vocab_size, tf.int32, name="next_beam_parent_ids") # Append new ids to current predictions previously_finished = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=previously_finished, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_finished = tf.logical_or( previously_finished, tf.equal(next_word_ids, end_token), name="next_beam_finished") # Calculate the length of the next predictions. # 1. Finished beams remain unchanged. # 2. Beams that are now finished (EOS predicted) have their length # increased by 1. # 3. Beams that are not yet finished have their length increased by 1. lengths_to_add = tf.cast(tf.logical_not(previously_finished), tf.int64) next_prediction_len = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=beam_state.lengths, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_prediction_len += lengths_to_add next_accumulated_attention_probs = () if accumulated_attention_probs is not None: next_accumulated_attention_probs = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=accumulated_attention_probs, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1], name="next_accumulated_attention_probs") # Pick out the cell_states according to the next_beam_ids. We use a # different gather_shape here because the cell_state tensors, i.e. # the tensors that would be gathered from, all have dimension # greater than two and we need to preserve those dimensions. next_cell_state = tf.nest.map_structure( lambda gather_from: _maybe_tensor_gather_helper( gather_indices=next_beam_ids, gather_from=gather_from, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1]), next_cell_state) next_state = BeamSearchDecoderState( cell_state=next_cell_state, log_probs=next_beam_probs, lengths=next_prediction_len, finished=next_finished, accumulated_attention_probs=next_accumulated_attention_probs) output = BeamSearchDecoderOutput( scores=next_beam_scores, predicted_ids=next_word_ids, parent_ids=next_beam_ids) return output, next_state
def compute_mask(self, inputs, mask=None): return tf.logical_not(tf.math.is_nan(inputs))
def assign_and_sample_proposals(proposed_boxes, gt_boxes, gt_classes, num_samples_per_image=512, mix_gt_boxes=True, fg_fraction=0.25, fg_iou_thresh=0.5, bg_iou_thresh_hi=0.5, bg_iou_thresh_lo=0.0): """Assigns the proposals with groundtruth classes and performs subsmpling. Given `proposed_boxes`, `gt_boxes`, and `gt_classes`, the function uses the following algorithm to generate the final `num_samples_per_image` RoIs. 1. Calculates the IoU between each proposal box and each gt_boxes. 2. Assigns each proposed box with a groundtruth class and box by choosing the largest IoU overlap. 3. Samples `num_samples_per_image` boxes from all proposed boxes, and returns box_targets, class_targets, and RoIs. Args: proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number of proposals before groundtruth assignment. The last dimension is the box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] format. gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The coordinates of gt_boxes are in the pixel coordinates of the scaled image. This tensor might have padding of values -1 indicating the invalid box coordinates. gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This tensor might have paddings with values of -1 indicating the invalid classes. num_samples_per_image: a integer represents RoI minibatch size per image. mix_gt_boxes: a bool indicating whether to mix the groundtruth boxes before sampling proposals. fg_fraction: a float represents the target fraction of RoI minibatch that is labeled foreground (i.e., class > 0). fg_iou_thresh: a float represents the IoU overlap threshold for an RoI to be considered foreground (if >= fg_iou_thresh). bg_iou_thresh_hi: a float represents the IoU overlap threshold for an RoI to be considered background (class = 0 if overlap in [LO, HI)). bg_iou_thresh_lo: a float represents the IoU overlap threshold for an RoI to be considered background (class = 0 if overlap in [LO, HI)). Returns: sampled_rois: a tensor of shape of [batch_size, K, 4], representing the coordinates of the sampled RoIs, where K is the number of the sampled RoIs, i.e. K = num_samples_per_image. sampled_gt_boxes: a tensor of shape of [batch_size, K, 4], storing the box coordinates of the matched groundtruth boxes of the samples RoIs. sampled_gt_classes: a tensor of shape of [batch_size, K], storing the classes of the matched groundtruth boxes of the sampled RoIs. sampled_gt_indices: a tensor of shape of [batch_size, K], storing the indices of the sampled groudntruth boxes in the original `gt_boxes` tensor, i.e. gt_boxes[sampled_gt_indices[:, i]] = sampled_gt_boxes[:, i]. """ with tf.name_scope('sample_proposals'): if mix_gt_boxes: boxes = tf.concat([proposed_boxes, gt_boxes], axis=1) else: boxes = proposed_boxes (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou, _) = box_matching(boxes, gt_boxes, gt_classes) positive_match = tf.greater(matched_iou, fg_iou_thresh) negative_match = tf.logical_and( tf.greater_equal(matched_iou, bg_iou_thresh_lo), tf.less(matched_iou, bg_iou_thresh_hi)) ignored_match = tf.less(matched_iou, 0.0) # re-assign negatively matched boxes to the background class. matched_gt_classes = tf.where(negative_match, tf.zeros_like(matched_gt_classes), matched_gt_classes) matched_gt_indices = tf.where(negative_match, tf.zeros_like(matched_gt_indices), matched_gt_indices) sample_candidates = tf.logical_and( tf.logical_or(positive_match, negative_match), tf.logical_not(ignored_match)) sampler = ( balanced_positive_negative_sampler.BalancedPositiveNegativeSampler( positive_fraction=fg_fraction, is_static=True)) batch_size, _ = sample_candidates.get_shape().as_list() sampled_indicators = [] for i in range(batch_size): sampled_indicator = sampler.subsample(sample_candidates[i], num_samples_per_image, positive_match[i]) sampled_indicators.append(sampled_indicator) sampled_indicators = tf.stack(sampled_indicators) _, sampled_indices = tf.nn.top_k( tf.cast(sampled_indicators, dtype=tf.int32), k=num_samples_per_image, sorted=True) sampled_indices_shape = tf.shape(sampled_indices) batch_indices = ( tf.expand_dims(tf.range(sampled_indices_shape[0]), axis=-1) * tf.ones([1, sampled_indices_shape[-1]], dtype=tf.int32)) gather_nd_indices = tf.stack([batch_indices, sampled_indices], axis=-1) sampled_rois = tf.gather_nd(boxes, gather_nd_indices) sampled_gt_boxes = tf.gather_nd(matched_gt_boxes, gather_nd_indices) sampled_gt_classes = tf.gather_nd(matched_gt_classes, gather_nd_indices) sampled_gt_indices = tf.gather_nd(matched_gt_indices, gather_nd_indices) return (sampled_rois, sampled_gt_boxes, sampled_gt_classes, sampled_gt_indices)
def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids): self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout( self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather( tf.get_variable( "char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"] ) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) if not self.lm_file: elmo_module = hub.Module("https://tfhub.dev/google/elmo/2") lm_embeddings = elmo_module(inputs={ "tokens": tokens, "sequence_len": text_len }, signature="tokens", as_dict=True) word_emb = lm_embeddings[ "word_emb"] # [num_sentences, max_sentence_length, 512] lm_emb = tf.stack([ tf.concat([word_emb, word_emb], -1), lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"] ], -1) # [num_sentences, max_sentence_length, 1024, 3] lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable( "lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape( lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers]) flattened_aggregated_lm_emb = tf.matmul( flattened_lm_emb, tf.expand_dims( self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape( flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout( context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout( head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask( text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] context_outputs = self.lstm_contextualize( context_emb, text_len, text_len_mask) # [num_words, emb] num_words = util.shape(context_outputs, 0) genre_emb = tf.gather( tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile( tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence( sentence_indices, text_len_mask) # [num_words] flattened_head_emb = self.flatten_emb_by_sentence( head_emb, text_len_mask) # [num_words] candidate_starts = tf.tile( tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width] candidate_ends = candidate_starts + tf.expand_dims( tf.range(self.max_span_width), 0) # [num_words, max_span_width] candidate_start_sentence_indices = tf.gather( flattened_sentence_indices, candidate_starts) # [num_words, max_span_width] candidate_end_sentence_indices = tf.gather( flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width] candidate_mask = tf.logical_and( candidate_ends < num_words, tf.equal( candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width] flattened_candidate_mask = tf.reshape( candidate_mask, [-1]) # [num_words * max_span_width] candidate_starts = tf.boolean_mask( tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates] candidate_ends = tf.boolean_mask( tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates] candidate_sentence_indices = tf.boolean_mask( tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates] candidate_cluster_ids = self.get_candidate_labels( candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates] candidate_span_emb = self.get_span_emb( flattened_head_emb, context_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores( candidate_span_emb) # [k, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k] k = tf.to_int32( tf.floor( tf.to_float(tf.shape(context_outputs)[0]) * self.config["top_span_ratio"])) top_span_indices = coref_ops.extract_spans( tf.expand_dims(candidate_mention_scores, 0), tf.expand_dims(candidate_starts, 0), tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0), util.shape(context_outputs, 0), True) # [1, k] top_span_indices.set_shape([1, None]) top_span_indices = tf.squeeze(top_span_indices, 0) # [k] top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k] top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k] top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb] top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k] top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k] top_span_sentence_indices = tf.gather(candidate_sentence_indices, top_span_indices) # [k] top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k] c = tf.minimum(self.config["max_top_antecedents"], k) if self.config["coarse_to_fine"]: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning( top_span_emb, top_span_mention_scores, c) else: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning( top_span_emb, top_span_mention_scores, c) dummy_scores = tf.zeros([k, 1]) # [k, 1] for i in range(self.config["coref_depth"]): with tf.variable_scope("coref_layer", reuse=(i > 0)): top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb] top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores( top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb) # [k, c] top_antecedent_weights = tf.nn.softmax( tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1] top_antecedent_emb = tf.concat( [tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb] attended_span_emb = tf.reduce_sum( tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb] with tf.variable_scope("f"): f = tf.sigmoid( util.projection( tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb] top_span_emb = f * attended_span_emb + ( 1 - f) * top_span_emb # [k, emb] top_antecedent_scores = tf.concat( [dummy_scores, top_antecedent_scores], 1) # [k, c + 1] top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c] top_antecedent_cluster_ids += tf.to_int32( tf.log(tf.to_float(top_antecedents_mask))) # [k, c] same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims( top_span_cluster_ids, 1)) # [k, c] non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1] pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c] dummy_labels = tf.logical_not( tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1] top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1] loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] loss = tf.reduce_sum(loss) # [] return [ candidate_starts, candidate_ends, candidate_mention_scores, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores ], loss
def ssd_losses(logits, localisations, gclasses, glocalisations, gscores, match_threshold=0.5, negative_ratio=3., alpha=1., label_smoothing=0., scope=None): """Loss functions for training the SSD 300 VGG network. This function defines the different loss components of the SSD, and adds them to the TF loss collection. Arguments: logits: (list of) predictions logits Tensors; localisations: (list of) localisations Tensors; gclasses: (list of) groundtruth labels Tensors; glocalisations: (list of) groundtruth localisations Tensors; gscores: (list of) groundtruth score Tensors; """ with tf.name_scope(scope, 'ssd_losses'): l_cross_pos = [] l_cross_neg = [] l_loc = [] for i in range(len(logits)): dtype = logits[i].dtype with tf.name_scope('block_%i' % i): # Determine weights Tensor. pmask = gscores[ i] > match_threshold # treat as positive (matched) if score is greater than some threshold !!! fpmask = tf.cast(pmask, dtype) n_positives = tf.reduce_sum(fpmask) # Select some random negative entries. # n_entries = np.prod(gclasses[i].get_shape().as_list()) # r_positive = n_positives / n_entries # r_negative = negative_ratio * n_positives / (n_entries - n_positives) # Negative mask. no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits[i]) nmask = tf.logical_and( tf.logical_not(pmask), # treat rest as negative gscores[i] > -0.5) fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, :, :, :, 0], 1. - fnmask) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. n_neg = tf.cast(negative_ratio * n_positives, tf.int32) n_neg = tf.maximum(n_neg, tf.size(nvalues_flat) // 8) n_neg = tf.maximum(n_neg, tf.shape(nvalues)[0] * 4) max_neg_entries = 1 + tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.minimum(n_neg, max_neg_entries) val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) minval = val[-1] # Final negative mask. nmask = tf.logical_and(nmask, -nvalues > minval) fnmask = tf.cast(nmask, dtype) # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits[i], labels=gclasses[i]) loss = tf.losses.compute_weighted_loss( loss, fpmask) # use positive mask for cross entropy positive l_cross_pos.append(loss) # positive cross entropy loss with tf.name_scope('cross_entropy_neg'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits[i], labels=no_classes) loss = tf.losses.compute_weighted_loss( loss, fnmask) # use negative mask for cross entropy negative l_cross_neg.append(loss) # negative cross entropy loss # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims( alpha * fpmask, axis=-1 ) # alpha is just 1 here ... (see p.5 of paper end of paragraph "Training objective") loss = custom_layers.abs_smooth( localisations[i] - glocalisations[i] ) # smooth L1 oss (see eq 2 on p.5 of paper !) loss = tf.losses.compute_weighted_loss(loss, weights) l_loc.append(loss) # localization loss # Additional total losses... with tf.name_scope('total'): total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos') total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg') total_cross = tf.add( total_cross_pos, total_cross_neg, 'cross_entropy' ) # add positive and negative cross entropies to get total cross entropy total_loc = tf.add_n(l_loc, 'localization') # localiation loss # Add to EXTRA LOSSES TF.collection tf.add_to_collection('EXTRA_LOSSES', total_cross_pos) tf.add_to_collection('EXTRA_LOSSES', total_cross_neg) tf.add_to_collection('EXTRA_LOSSES', total_cross) tf.add_to_collection('EXTRA_LOSSES', total_loc)
class Dummy: pass env = Dummy() with tf.variable_scope('model'): env.x = tf.placeholder(tf.float32, (None, img_size, img_size, img_chan), name='x') env.y = tf.placeholder(tf.float32, (None, 1), name='y') env.training = tf.placeholder_with_default(False, (), name='mode') env.ybar = model(env.x, training=env.training) with tf.variable_scope('acc'): count = tf.logical_not( tf.logical_xor(tf.greater(env.y, 0.0), tf.greater(env.ybar, 0.0))) env.acc = tf.reduce_mean(tf.cast(count, tf.float32), name='acc') env.loss = tf.losses.mean_squared_error(labels=env.y, predictions=env.ybar, scope='loss') with tf.variable_scope('train_op'): optimizer = tf.train.AdamOptimizer() env.train_op = optimizer.minimize(env.loss) env.saver = tf.train.Saver() with tf.variable_scope('model', reuse=True): env.adv_epochs = tf.placeholder(tf.int32, (), name='adv_epochs') env.xadv = deepfool(model, env.x, epochs=env.adv_epochs, batch=True)
def adaptive_search(self, inputs, closed, last_beam_size, beam_size, natural_order_tokens, natural_order_pos, **kwargs): """A function that implements a forward pass and updates the decoding partial sequence using a beam search Arguments: inputs: Dataclass a dataclass that stores partial decoding information that will be mutated by this layer during decoding closed: tf.Tensor a boolean tensor where true values indicate that a beam has finished decoding and should not be modified last_beam_size: int the number of beams that were expanded by the last layer in an autoregressive model beam_size: int the number of beams to be expanded by this layer in an autoregressive model natural_order_tokens: tf.Tensor a batch of sequences representing the generation index of tokens in natural order that are yet to be decoded. natural_order_pos: tf.Tensor a batch of sequences representing the word ids of tokens in natural order that are yet to be decoded. Returns: decoding: Dataclass a dataclass that stores partial decoding information that will be mutated by this layer during decoding closed: tf.Tensor a boolean tensor where true values indicate that a beam has finished decoding and should not be modified beam_size: int the number of beams to be expanded by this layer in an autoregressive model natural_order: tf.Tensor a batch of sequences representing the words in natural order that are yet to be decoded.""" # unpack all the requires model inputs, some might be empty tensors: [ queries, values, queries_mask, values_mask, ids, permutation, absolute_positions, relative_positions, pointer_labels, logits_labels, partial_pos, pointer_probs, log_probs, object_detections, object_features, object_boxes ] = inputs # compute a distribution over tokens logits = self.logits_before_softmax(queries, **kwargs)[:, -1] # calculate a mask over the vocab mask = tf.reduce_sum(tf.one_hot(natural_order_tokens, tf.shape(logits)[1], axis=2), axis=1) # make sure the mask is clipped to be 0.0 or 1.0 mask = tf.clip_by_value(mask, 0.0, 1.0) # true if the mask contains <unk> or any word is_token = tf.logical_or( tf.equal(mask[:, 1], 1), tf.reduce_any(tf.equal(mask[:, 4:], 1), axis=1)) # true if the mask does not contain any words, and contains <end> is_end = tf.logical_and(tf.logical_not(is_token), tf.equal(mask[:, 3], 1)) # a mask that contains only the <end> token end_mask = tf.one_hot(tf.fill([tf.shape(logits)[0]], 3), tf.shape(logits)[1], axis=1) # a mask that contains only the <pad> token pad_mask = tf.one_hot(tf.fill([tf.shape(logits)[0]], 0), tf.shape(logits)[1], axis=1) # a mask that contains only words token_mask = tf.clip_by_value(mask - end_mask - pad_mask, 0.0, 1.0) # create a batch of different masks mask = tf.where( is_end[:, tf.newaxis], end_mask, tf.where(is_token[:, tf.newaxis], token_mask, pad_mask)) # convert the masks into offsets for the softmax op: 0 -> -\infty offset = (1.0 - mask) * 999999.0 logits = tf.math.log_softmax(logits - offset) batch_size = tf.shape(logits)[0] // last_beam_size # sample the top beam_size candidates _log_probs, _ids = tf.math.top_k(logits, k=beam_size) # when a beam is closed all candidates are the same # this prevents the same candidates from being sampled twice first = tf.one_hot(tf.fill(tf.shape(_log_probs)[:1], 0), beam_size) closed_log_probs = tf.where(tf.equal(first, 0), tf.fill(tf.shape(first), -999999.), tf.fill(tf.shape(first), 0.)) # when a beam is closed special behavior is required # do not change the log probability and append only pad tokens mask = closed[:, tf.newaxis] _log_probs = tf.where(mask, closed_log_probs, _log_probs) _ids = tf.where(mask, tf.zeros_like(_ids), _ids) # manipulate the log probabilities to extract all possible # next beam candidates and their probability _log_probs = tf.reshape(_log_probs, [batch_size, last_beam_size, beam_size]) _log_probs = tf.reshape(log_probs, [batch_size, last_beam_size, 1]) + _log_probs _log_probs = tf.reshape(_log_probs, [batch_size, last_beam_size * beam_size]) # select the top beam_size candidates _log_probs, beam_ids = tf.math.top_k(_log_probs, k=beam_size) # these indices may be a bit subtle; they work as follows # the last dim has last_beam_size * beam_size elements # the first beam_size elements represent candidate proposals # from a single original beam old_beam_ids = tf.math.floordiv(beam_ids, beam_size) # select the ids based on their beams that are from the beams with # highest log probability _ids = tf.reshape(_ids, [batch_size, last_beam_size * beam_size]) _ids = tf.gather(_ids, beam_ids, batch_dims=1) _ids = tf.reshape(_ids, [batch_size * beam_size, 1]) # this function helps select the hidden activations from # inputs that correspond to old selected beams # this is necessary because future layers may depend on activations # that are a function of which beam was selected def select(x): if x is None: return x shape = tf.shape(x)[1:] s0 = tf.concat([[batch_size, last_beam_size], shape], axis=0) s1 = tf.concat([[batch_size * beam_size], shape], axis=0) return tf.reshape( tf.gather(tf.reshape(x, s0), old_beam_ids, batch_dims=1), s1) # select which old beams are propagated forward # this is necessary because some beams have content-aware state queries = select(queries) values = select(values) queries_mask = select(queries_mask) values_mask = select(values_mask) ids = select(ids) permutation = select(permutation) absolute_positions = select(absolute_positions) relative_positions = select(relative_positions) partial_pos = select(partial_pos) pointer_labels = select(pointer_labels) logits_labels = select(logits_labels) closed = select(closed) natural_order_tokens = select(natural_order_tokens) natural_order_pos = select(natural_order_pos) # TODO: Brandon -> handle the image features as well. object_detections = select(object_detections) object_features = select(object_features) object_boxes = select(object_boxes) # concatenate the sampled tokens to the beam and prepare the # model outputs for the next layer; also compute if we # has finished decoding by predicting the end token ids = tf.concat([ids, _ids], 1) log_probs = tf.reshape(_log_probs, [batch_size * beam_size]) return ([ queries, values, queries_mask, values_mask, ids, permutation, absolute_positions, relative_positions, pointer_labels, logits_labels, partial_pos, pointer_probs, log_probs, object_detections, object_features, object_boxes ], tf.logical_or(closed, tf.equal(_ids[:, 0], 3)), beam_size, natural_order_tokens, natural_order_pos)
def _calc_oicr_loss(self, labels, num_proposals, proposals, scores_0, scores_1, scope, iou_threshold=0.5): """Calculates the OICR loss at refinement stage `i`. Args: labels: A [batch, num_classes] float tensor. num_proposals: A [batch] int tensor. proposals: A [batch, max_num_proposals, 4] float tensor. scores_0: A [batch, max_num_proposal, 1 + num_classes] float tensor, representing the proposal score at `k-th` refinement. scores_1: A [batch, max_num_proposal, 1 + num_classes] float tensor, representing the proposal score at `(k+1)-th` refinement. Returns: oicr_cross_entropy_loss: a scalar float tensor. """ with tf.name_scope(scope): (batch, max_num_proposals, num_classes_plus_one) = utils.get_tensor_shape(scores_0) num_classes = num_classes_plus_one - 1 # For each class, look for the most confident proposal. # proposal_ind shape = [batch, num_classes]. proposal_mask = tf.sequence_mask( num_proposals, maxlen=max_num_proposals, dtype=tf.float32) proposal_ind = utils.masked_argmax( tf.nn.softmax(scores_0, axis=-1)[:, :, 1:], tf.expand_dims(proposal_mask, axis=-1), dim=1) # Deal with the most confident proposal per each class. # Unstack the `proposal_ind`, `labels`. # proposal_labels shape = [batch, max_num_proposals, num_classes]. proposal_labels = [] indices_0 = tf.range(batch, dtype=tf.int64) for indices_1, label_per_class in zip( tf.unstack(proposal_ind, axis=-1), tf.unstack(labels, axis=-1)): # Gather the most confident proposal for the class. # confident_proosal shape = [batch, 4]. indices = tf.stack([indices_0, indices_1], axis=-1) confident_proposal = tf.gather_nd(proposals, indices) # Get the Iou from all the proposals to the most confident proposal. # iou shape = [batch, max_num_proposals]. confident_proposal_tiled = tf.tile( tf.expand_dims(confident_proposal, axis=1), [1, max_num_proposals, 1]) iou = box_utils.iou( tf.reshape(proposals, [-1, 4]), tf.reshape(confident_proposal_tiled, [-1, 4])) iou = tf.reshape(iou, [batch, max_num_proposals]) # Filter out irrelevant predictions using image-level label. target = tf.to_float(tf.greater_equal(iou, iou_threshold)) target = tf.where( label_per_class > 0, x=target, y=tf.zeros_like(target)) proposal_labels.append(target) proposal_labels = tf.stack(proposal_labels, axis=-1) # Add background targets, and normalize the sum value to 1.0. # proposal_labels shape = [batch, max_num_proposals, 1 + num_classes]. bkg = tf.logical_not(tf.reduce_sum(proposal_labels, axis=-1) > 0) proposal_labels = tf.concat( [tf.expand_dims(tf.to_float(bkg), axis=-1), proposal_labels], axis=-1) proposal_labels = tf.div( proposal_labels, tf.reduce_sum( proposal_labels, axis=-1, keepdims=True)) assert_op = tf.Assert( tf.reduce_all( tf.abs(tf.reduce_sum(proposal_labels, axis=-1) - 1) < 1e-6), ["Probabilities not sum to ONE", proposal_labels]) # Compute the loss. with tf.control_dependencies([assert_op]): losses = tf.nn.softmax_cross_entropy_with_logits( labels=tf.stop_gradient(proposal_labels), logits=scores_1) oicr_cross_entropy_loss = tf.reduce_mean( utils.masked_avg(data=losses, mask=proposal_mask, dim=1)) return oicr_cross_entropy_loss
def loop_cond(i, decodes_BxT, unused_cache_BxU_dict): finished_B = tf.reduce_any(tf.equal(decodes_BxT, eos_id), axis=1) return tf.logical_and(i < max_decode_len, tf.logical_not(tf.reduce_all(finished_B)))
def __invert__(self): return tf.logical_not(self)
def _fp(y_true, y_pred, typecast='float32'): bad_preds = K.cast(tf.logical_not(K.equal(y_true, y_pred)), typecast) false_pos = K.cast(K.sum(bad_preds * K.cast(K.equal(y_true, 0), typecast)), typecast) return false_pos
def main(logdir='./logs/cla'): data_batch, label_batch = read_batch( './ISBI2016_ISIC_Part3B_Training_Data_tight_cropped', './ISBI2016_ISIC_Part3B_Training_GroundTruth.csv', batch_size) data, label = preprocess(data_batch, label_batch) result, pretrained_saver, keep_prob = model(data) with tf.name_scope('softmax_with_loss'): cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=result, labels=label, dim=1)) # train_op = optimize_with_two_lr( # tf.train.AdamOptimizer, cross_entropy, # var_list1, var_list2, 0.001, 0.0001) train_op = tf.train.AdamOptimizer(0.0001).minimize(cross_entropy) with tf.name_scope('evaluation'): prediction = tf.cast(tf.argmax(result, 1), tf.bool) ground_truth = tf.cast(tf.argmax(label, 1), tf.bool) TP = tf.reduce_sum( tf.cast(tf.logical_and(prediction, ground_truth), tf.int32)) TN = tf.reduce_sum( tf.cast(tf.logical_not(tf.logical_or(prediction, ground_truth)), tf.int32)) FP = tf.reduce_sum( tf.cast(tf.logical_and(prediction, tf.logical_not(ground_truth)), tf.int32)) FN = tf.reduce_sum( tf.cast(tf.logical_and(tf.logical_not(prediction), ground_truth), tf.int32)) summary_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) saver = tf.train.Saver(max_to_keep=10, keep_checkpoint_every_n_hours=1) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) pretrained_saver.restore(sess, './ResNet-L50.ckpt') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) tp = 0 tn = 0 fp = 0 fn = 0 loss = 0 for i in range(epochs): for j in range(900 / batch_size): TP_step, TN_step, FP_step, FN_step, loss_step, _ = sess.run( [TP, TN, FP, FN, cross_entropy, train_op], feed_dict={keep_prob: 0.5}) tp += TP_step tn += TN_step fp += FP_step fn += FN_step loss += loss_step if j % 25 == 24: acc = (tp + tn) / float(tp + tn + fp + fn) se = tp / float(tp + fn) sp = tn / float(tn + fp) loss /= 25.0 my_summary = tf.Summary(value=[ tf.Summary.Value(tag="accuracy", simple_value=acc), tf.Summary.Value(tag="sensitivity", simple_value=se), tf.Summary.Value(tag="specificity", simple_value=sp), tf.Summary.Value(tag="loss", simple_value=loss), ]) summary_writer.add_summary(my_summary, i * 900 / batch_size + j) print 'epoch', i + 1, 'batch', j + 1 print 'accuracy', acc, 'cross_entropy', loss acc = 0 loss = 0 saver.save(sess, 'ckpts/cla/cla', global_step=(i + 1) * 900 / batch_size) coord.request_stop() coord.join(threads)
def batch_hard(dists, pids, margin, batch_precision_at_k=None): """Computes the batch-hard loss from arxiv.org/abs/1703.07737. Args: dists (2D tensor): A square all-to-all distance matrix as given by cdist. pids (1D tensor): The identities of the entries in `batch`, shape (B,). This can be of any type that can be compared, thus also a string. margin: The value of the margin if a number, alternatively the string 'soft' for using the soft-margin formulation, or `None` for not using a margin at all. Returns: A 1D tensor of shape (B,) containing the loss value for each sample. """ with tf.name_scope("batch_hard"): same_identity_mask = tf.equal(tf.expand_dims(pids, axis=1), tf.expand_dims(pids, axis=0)) negative_mask = tf.logical_not(same_identity_mask) positive_mask = tf.logical_xor(same_identity_mask, tf.eye(tf.shape(pids)[0], dtype=tf.bool)) furthest_positive = tf.reduce_max(dists*tf.cast(positive_mask, tf.float32), axis=1) closest_negative = tf.map_fn(lambda x: tf.reduce_min(tf.boolean_mask(x[0], x[1])), (dists, negative_mask), tf.float32) # Another way of achieving the same, though more hacky: # closest_negative = tf.reduce_min(dists + 1e5*tf.cast(same_identity_mask, tf.float32), axis=1) diff = furthest_positive - closest_negative if isinstance(margin, numbers.Real): diff = tf.maximum(diff + margin, 0.0) elif margin == 'soft': diff = tf.nn.softplus(diff) elif margin.lower() == 'none': pass else: raise NotImplementedError( 'The margin {} is not implemented in batch_hard'.format(margin)) if batch_precision_at_k is None: return diff # For monitoring, compute the within-batch top-1 accuracy and the # within-batch precision-at-k, which is somewhat more expressive. with tf.name_scope("monitoring"): # This is like argsort along the last axis. Add one to K as we'll # drop the diagonal. _, indices = tf.nn.top_k(-dists, k=batch_precision_at_k+1) # Drop the diagonal (distance to self is always least). indices = indices[:,1:] # Generate the index indexing into the batch dimension. # This is simething like [[0,0,0],[1,1,1],...,[B,B,B]] batch_index = tf.tile( tf.expand_dims(tf.range(tf.shape(indices)[0]), 1), (1, tf.shape(indices)[1])) # Stitch the above together with the argsort indices to get the # indices of the top-k of each row. topk_indices = tf.stack((batch_index, indices), -1) # See if the topk belong to the same person as they should, or not. topk_is_same = tf.gather_nd(same_identity_mask, topk_indices) # All of the above could be reduced to the simpler following if k==1 #top1_is_same = get_at_indices(same_identity_mask, top_idxs[:,1]) topk_is_same_f32 = tf.cast(topk_is_same, tf.float32) top1 = tf.reduce_mean(topk_is_same_f32[:,0]) prec_at_k = tf.reduce_mean(topk_is_same_f32) # Finally, let's get some more info that can help in debugging while # we're at it! negative_dists = tf.boolean_mask(dists, negative_mask) positive_dists = tf.boolean_mask(dists, positive_mask) return diff, top1, prec_at_k, topk_is_same, negative_dists, positive_dists
def _fn(y_true, y_pred, typecast='float32'): bad_preds = K.cast(tf.logical_not(K.equal(y_true, y_pred)), typecast) false_neg = K.cast(K.sum(bad_preds * y_true), typecast) return false_neg
def connect_data_and_network(self, outputs_collector=None, gradients_collector=None): print('connect data and network') def switch_sampler(for_training): with tf.name_scope('train' if for_training else 'validation'): sampler = self.get_sampler()[0][0 if for_training else -1] return sampler.pop_batch_op() def mixup_switch_sampler(for_training): # get first set of samples d_dict = switch_sampler(for_training=for_training) mix_fields = ('image', 'weight', 'label') if not for_training: with tf.name_scope('nomix'): # ensure label is appropriate for dense loss functions ground_truth = tf.cast(d_dict['label'], tf.int32) one_hot = tf.one_hot( tf.squeeze(ground_truth, axis=-1), depth=self.segmentation_param.num_classes) d_dict['label'] = one_hot else: with tf.name_scope('mixup'): # get the mixing parameter from the Beta distribution alpha = self.segmentation_param.mixup_alpha beta = tf.distributions.Beta(alpha, alpha) # 1, 1: uniform: rand_frac = beta.sample() # get another minibatch d_dict_to_mix = switch_sampler(for_training=True) # look at binarised labels: sort them if self.segmentation_param.mix_match: # sum up the positive labels to sort by their volumes inds1 = tf.argsort( tf.map_fn(tf.reduce_sum, tf.cast(d_dict['label'], tf.int64))) inds2 = tf.argsort( tf.map_fn( tf.reduce_sum, tf.cast(d_dict_to_mix['label'] > 0, tf.int64))) for field in [ field for field in mix_fields if field in d_dict ]: d_dict[field] = tf.gather(d_dict[field], indices=inds1) # note: sorted for opposite directions for d_dict_to_mix d_dict_to_mix[field] = tf.gather( d_dict_to_mix[field], indices=inds2[::-1]) # making the labels dense and one-hot for d in (d_dict, d_dict_to_mix): ground_truth = tf.cast(d['label'], tf.int32) one_hot = tf.one_hot( tf.squeeze(ground_truth, axis=-1), depth=self.segmentation_param.num_classes) d['label'] = one_hot # do the mixing for any fields that are relevant and present mixed_up = { field: d_dict[field] * rand_frac + d_dict_to_mix[field] * (1 - rand_frac) for field in mix_fields if field in d_dict } # reassign all relevant values in d_dict d_dict.update(mixed_up) return d_dict if self.is_training: if not self.segmentation_param.do_mixup: data_dict = tf.cond(tf.logical_not(self.is_validation), lambda: switch_sampler(for_training=True), lambda: switch_sampler(for_training=False)) else: # mix up the samples if not in validation phase data_dict = tf.cond( tf.logical_not(self.is_validation), lambda: mixup_switch_sampler(for_training=True), lambda: mixup_switch_sampler(for_training=False )) # don't mix the validation image = tf.cast(data_dict['image'], tf.float32) net_args = { 'is_training': self.is_training, 'keep_prob': self.net_param.keep_prob } net_out = self.net(image, **net_args) with tf.name_scope('Optimiser'): optimiser_class = OptimiserFactory.create( name=self.action_param.optimiser) self.optimiser = optimiser_class.get_instance( learning_rate=self.action_param.lr) loss_func = LossFunction( n_class=self.segmentation_param.num_classes, loss_type=self.action_param.loss_type, softmax=self.segmentation_param.softmax) data_loss = loss_func(prediction=net_out, ground_truth=data_dict.get('label', None), weight_map=data_dict.get('weight', None)) reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) if self.net_param.decay > 0.0 and reg_losses: reg_loss = tf.reduce_mean( [tf.reduce_mean(reg_loss) for reg_loss in reg_losses]) loss = data_loss + reg_loss else: loss = data_loss # Get all vars to_optimise = tf.trainable_variables() vars_to_freeze = \ self.action_param.vars_to_freeze or \ self.action_param.vars_to_restore if vars_to_freeze: import re var_regex = re.compile(vars_to_freeze) # Only optimise vars that are not frozen to_optimise = \ [v for v in to_optimise if not var_regex.search(v.name)] tf.logging.info( "Optimizing %d out of %d trainable variables, " "the other variables fixed (--vars_to_freeze %s)", len(to_optimise), len(tf.trainable_variables()), vars_to_freeze) grads = self.optimiser.compute_gradients( loss, var_list=to_optimise, colocate_gradients_with_ops=True) self.total_loss = loss # collecting gradients variables gradients_collector.add_to_collection([grads]) # collecting output variables outputs_collector.add_to_collection(var=self.total_loss, name='total_loss', average_over_devices=True, collection=CONSOLE) outputs_collector.add_to_collection(var=self.total_loss, name='total_loss', average_over_devices=True, summary_type='scalar', collection=TF_SUMMARIES) outputs_collector.add_to_collection(var=data_loss, name='loss', average_over_devices=False, collection=CONSOLE) outputs_collector.add_to_collection(var=data_loss, name='loss', average_over_devices=True, summary_type='scalar', collection=TF_SUMMARIES) # outputs_collector.add_to_collection( # var=image*180.0, name='image', # average_over_devices=False, summary_type='image3_sagittal', # collection=TF_SUMMARIES) # outputs_collector.add_to_collection( # var=image, name='image', # average_over_devices=False, # collection=NETWORK_OUTPUT) # outputs_collector.add_to_collection( # var=tf.reduce_mean(image), name='mean_image', # average_over_devices=False, summary_type='scalar', # collection=CONSOLE) elif self.is_inference: # converting logits into final output for # classification probabilities or argmax classification labels data_dict = switch_sampler(for_training=False) image = tf.cast(data_dict['image'], tf.float32) net_args = { 'is_training': self.is_training, 'keep_prob': self.net_param.keep_prob } net_out = self.net(image, **net_args) output_prob = self.segmentation_param.output_prob num_classes = self.segmentation_param.num_classes if output_prob and num_classes > 1: post_process_layer = PostProcessingLayer( 'SOFTMAX', num_classes=num_classes) elif not output_prob and num_classes > 1: post_process_layer = PostProcessingLayer( 'ARGMAX', num_classes=num_classes) else: post_process_layer = PostProcessingLayer( 'IDENTITY', num_classes=num_classes) net_out = post_process_layer(net_out) outputs_collector.add_to_collection(var=net_out, name='window', average_over_devices=False, collection=NETWORK_OUTPUT) outputs_collector.add_to_collection( var=data_dict['image_location'], name='location', average_over_devices=False, collection=NETWORK_OUTPUT) self.initialise_aggregator() elif self.is_export: data_dict = switch_sampler(for_training=False) output_prob = self.segmentation_param.output_prob num_classes = self.segmentation_param.num_classes image = tf.cast(data_dict['image'], tf.float32) net_args = { 'is_training': self.is_training, 'keep_prob': self.net_param.keep_prob } net_out = self.net(image, **net_args) post_process_layer = PostProcessingLayer('SOFTMAX', num_classes=num_classes) net_out = post_process_layer(net_out) self.initialise_aggregator()
def __init__(self, dataset, config): flat_inputs = dataset.flat_inputs self.config = config # Path of the result folder if self.config.saving: if self.config.saving_path is None: self.saving_path = time.strftime( 'results/Log_%Y-%m-%d_%H-%M-%S', time.gmtime()) else: self.saving_path = self.config.saving_path makedirs( self.saving_path) if not exists(self.saving_path) else None with tf.variable_scope('inputs'): self.inputs = dict() num_layers = self.config.num_layers self.inputs['xyz'] = flat_inputs[:num_layers] self.inputs['neigh_idx'] = flat_inputs[num_layers:2 * num_layers] self.inputs['sub_idx'] = flat_inputs[2 * num_layers:3 * num_layers] self.inputs['interp_idx'] = flat_inputs[3 * num_layers:4 * num_layers] self.inputs['features'] = flat_inputs[4 * num_layers] self.inputs['labels'] = flat_inputs[4 * num_layers + 1] self.inputs['input_inds'] = flat_inputs[4 * num_layers + 2] self.inputs['cloud_inds'] = flat_inputs[4 * num_layers + 3] self.labels = self.inputs['labels'] self.is_training = tf.placeholder(tf.bool, shape=()) self.training_step = 1 self.training_epoch = 0 self.correct_prediction = 0 self.accuracy = 0 self.mIou_list = [0] self.class_weights = DP.get_class_weights(dataset.name) self.Log_file = open( 'log_train_' + dataset.name + '_' + str(dataset.val_split) + time.strftime('_%Y-%m-%d_%H-%M-%S.txt', time.gmtime()), 'a') with tf.variable_scope('layers'): self.logits = self.inference(self.inputs, self.is_training) ##################################################################### # Ignore the invalid point (unlabeled) when calculating the loss # ##################################################################### with tf.variable_scope('loss'): self.logits = tf.reshape(self.logits, [-1, config.num_classes]) self.labels = tf.reshape(self.labels, [-1]) # Boolean mask of points that should be ignored ignored_bool = tf.zeros_like(self.labels, dtype=tf.bool) for ign_label in self.config.ignored_label_inds: ignored_bool = tf.logical_or(ignored_bool, tf.equal(self.labels, ign_label)) # Collect logits and labels that are not ignored valid_idx = tf.squeeze(tf.where(tf.logical_not(ignored_bool))) valid_logits = tf.gather(self.logits, valid_idx, axis=0) valid_labels_init = tf.gather(self.labels, valid_idx, axis=0) # Reduce label values in the range of logit shape reducing_list = tf.range(self.config.num_classes, dtype=tf.int32) inserted_value = tf.zeros((1, ), dtype=tf.int32) for ign_label in self.config.ignored_label_inds: reducing_list = tf.concat([ reducing_list[:ign_label], inserted_value, reducing_list[ign_label:] ], 0) valid_labels = tf.gather(reducing_list, valid_labels_init) self.loss = self.get_loss(valid_logits, valid_labels, self.class_weights) with tf.variable_scope('optimizer'): self.learning_rate = tf.Variable(config.learning_rate, trainable=False, name='learning_rate') self.train_op = tf.train.AdamOptimizer( self.learning_rate).minimize(self.loss) self.extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.variable_scope('results'): self.correct_prediction = tf.nn.in_top_k(valid_logits, valid_labels, 1) self.accuracy = tf.reduce_mean( tf.cast(self.correct_prediction, tf.float32)) self.prob_logits = tf.nn.softmax(self.logits) tf.summary.scalar('learning_rate', self.learning_rate) tf.summary.scalar('loss', self.loss) tf.summary.scalar('accuracy', self.accuracy) my_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) self.saver = tf.train.Saver(my_vars, max_to_keep=100) c_proto = tf.ConfigProto() c_proto.gpu_options.allow_growth = True self.sess = tf.Session(config=c_proto) self.merged = tf.summary.merge_all() self.train_writer = tf.summary.FileWriter(config.train_sum_dir, self.sess.graph) self.sess.run(tf.global_variables_initializer())