def rpn_cls_loss(rpn_cls_score,rpn_labels): ''' Calculate the Region Proposal Network classifier loss. Measures how well the RPN is able to propose regions by the performance of its "objectness" classifier. Standard cross-entropy loss on logits ''' with tf.variable_scope('rpn_cls_loss'): # input shape dimensions shape = tf.shape(rpn_cls_score) # Stack all classification scores into 2D matrix rpn_cls_score = tf.transpose(rpn_cls_score,[0,3,1,2]) rpn_cls_score = tf.reshape(rpn_cls_score,[shape[0],2,shape[3]//2*shape[1],shape[2]]) rpn_cls_score = tf.transpose(rpn_cls_score,[0,2,3,1]) rpn_cls_score = tf.reshape(rpn_cls_score,[-1,2]) # Stack labels rpn_labels = tf.reshape(rpn_labels,[-1]) # Ignore label=-1 (Neither object nor background: IoU between 0.3 and 0.7) rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score,tf.where(tf.not_equal(rpn_labels,-1))),[-1,2]) rpn_labels = tf.reshape(tf.gather(rpn_labels,tf.where(tf.not_equal(rpn_labels,-1))),[-1]) # Cross entropy error rpn_cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_labels)) return rpn_cross_entropy
def print_mask_parameter_counts(): print("# Mask Parameter Counts") print(" - Mask1: {0}".format( sess.run(tf.reduce_sum(tf.to_float(tf.not_equal(indicator_matrix1, tf.zeros_like(indicator_matrix1))))))) print(" - Mask2: {0}".format( sess.run(tf.reduce_sum(tf.to_float(tf.not_equal(indicator_matrix2, tf.zeros_like(indicator_matrix2))))))) print(" - Mask3: {0}".format( sess.run(tf.reduce_sum(tf.to_float(tf.not_equal(indicator_matrix3, tf.zeros_like(indicator_matrix3)))))))
def retrieve_seq_length_op3(data, pad_val=0): """An op to compute the length of a sequence, the data shape can be [batch_size, n_step(max)] or [batch_size, n_step(max), n_features]. If the data has type of tf.string and pad_val is assigned as empty string (''), this op will compute the length of the string sequence. Parameters: ----------- data : tensor [batch_size, n_step(max)] or [batch_size, n_step(max), n_features] with zero padding on the right hand side. pad_val: By default 0. If the data is tf.string, please assign this as empty string ('') Examples ----------- >>> data = [[[1],[2],[0],[0],[0]], >>> [[1],[2],[3],[0],[0]], >>> [[1],[2],[6],[1],[0]]] >>> data = tf.convert_to_tensor(data, dtype=tf.float32) >>> length = tl.layers.retrieve_seq_length_op3(data) [2, 3, 4] >>> data = [[[1,2],[2,2],[1,2],[1,2],[0,0]], >>> [[2,3],[2,4],[3,2],[0,0],[0,0]], >>> [[3,3],[2,2],[5,3],[1,2],[0,0]]] >>> data = tf.convert_to_tensor(data, dtype=tf.float32) >>> length = tl.layers.retrieve_seq_length_op3(data) [4, 3, 4] >>> data = [[1,2,0,0,0], >>> [1,2,3,0,0], >>> [1,2,6,1,0]] >>> data = tf.convert_to_tensor(data, dtype=tf.float32) >>> length = tl.layers.retrieve_seq_length_op3(data) [2, 3, 4] >>> data = [['hello','world','','',''], >>> ['hello','world','tensorlayer','',''], >>> ['hello','world','tensorlayer','2.0','']] >>> data = tf.convert_to_tensor(data, dtype=tf.string) >>> length = tl.layers.retrieve_seq_length_op3(data, pad_val='') [2, 3, 4] """ data_shape_size = data.get_shape().ndims if data_shape_size == 3: return tf.reduce_sum( input_tensor=tf.cast(tf.reduce_any(input_tensor=tf.not_equal(data, pad_val), axis=2), dtype=tf.int32), axis=1 ) elif data_shape_size == 2: return tf.reduce_sum(input_tensor=tf.cast(tf.not_equal(data, pad_val), dtype=tf.int32), axis=1) elif data_shape_size == 1: raise ValueError("retrieve_seq_length_op3: data has wrong shape! Shape got ", data.get_shape().as_list()) else: raise ValueError( "retrieve_seq_length_op3: handling data with num of dims %s hasn't been implemented!" % (data_shape_size) )
def padded_sequence_accuracy(logits, labels): """Percentage of times that predictions matches labels everywhere (non-0).""" with tf.variable_scope("padded_sequence_accuracy", values=[logits, labels]): logits, labels = _pad_tensors_to_same_length(logits, labels) weights = tf.to_float(tf.not_equal(labels, 0)) outputs = tf.to_int32(tf.argmax(logits, axis=-1)) padded_labels = tf.to_int32(labels) not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights axis = list(range(1, len(outputs.get_shape()))) correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis)) return correct_seq, tf.constant(1.0)
def target_mask_op(data, pad_val=0): # HangSheng: return tensor for mask,if input is tf.string """Return tensor for mask, if input is ``tf.string``.""" data_shape_size = data.get_shape().ndims if data_shape_size == 3: return tf.cast(tf.reduce_any(input_tensor=tf.not_equal(data, pad_val), axis=2), dtype=tf.int32) elif data_shape_size == 2: return tf.cast(tf.not_equal(data, pad_val), dtype=tf.int32) elif data_shape_size == 1: raise ValueError("target_mask_op: data has wrong shape!") else: raise ValueError("target_mask_op: handling data_shape_size %s hasn't been implemented!" % (data_shape_size))
def compute_error(self): #Sets mask variables and performs batch processing self.batch_gold_select = self.batch_print_answer > 0.0 self.full_column_mask = tf.concat( axis=1, values=[self.batch_number_column_mask, self.batch_word_column_mask]) self.full_processed_column = tf.concat( axis=1, values=[self.batch_processed_number_column, self.batch_processed_word_column]) self.full_processed_sorted_index_column = tf.concat(axis=1, values=[ self.batch_processed_sorted_index_number_column, self.batch_processed_sorted_index_word_column ]) self.select_bad_number_mask = tf.cast( tf.logical_and( tf.not_equal(self.full_processed_column, self.utility.FLAGS.pad_int), tf.not_equal(self.full_processed_column, self.utility.FLAGS.bad_number_pre_process)), self.data_type) self.select_mask = tf.cast( tf.logical_not( tf.equal(self.batch_number_column, self.utility.FLAGS.pad_int)), self.data_type) self.select_word_mask = tf.cast( tf.logical_not( tf.equal(self.batch_word_column_entry_mask, self.utility.dummy_token_id)), self.data_type) self.select_full_mask = tf.concat( axis=1, values=[self.select_mask, self.select_word_mask]) self.select_whole_mask = tf.maximum( tf.reshape( tf.slice(self.select_mask, [0, 0, 0], [self.batch_size, 1, self.max_elements]), [self.batch_size, self.max_elements]), tf.reshape( tf.slice(self.select_word_mask, [0, 0, 0], [self.batch_size, 1, self.max_elements]), [self.batch_size, self.max_elements])) self.invert_select_full_mask = tf.cast( tf.concat(axis=1, values=[ tf.equal(self.batch_number_column, self.utility.FLAGS.pad_int), tf.equal(self.batch_word_column_entry_mask, self.utility.dummy_token_id) ]), self.data_type) self.batch_lookup_answer = tf.zeros(tf.shape(self.batch_gold_select)) self.reset_select = self.select_whole_mask self.rows = tf.reduce_sum(self.select_whole_mask, 1) self.num_entries = tf.reshape( tf.reduce_sum(tf.reduce_sum(self.select_full_mask, 1), 1), [self.batch_size]) self.final_error, self.final_correct = self.batch_process() return self.final_error
def add_embedding(self): #embed=np.load('glove{0}_uniform.npy'.format(self.emb_dim)) with tf.variable_scope("Embed",regularizer=None): embedding=tf.get_variable('embedding',[self.num_emb, self.emb_dim] ,initializer=tf.random_uniform_initializer(-0.05,0.05),trainable=True,regularizer=None) ix=tf.to_int32(tf.not_equal(self.input,-1))*self.input emb_tree=tf.nn.embedding_lookup(embedding,ix) emb_tree=emb_tree*(tf.expand_dims( tf.to_float(tf.not_equal(self.input,-1)),2)) return emb_tree
def add_placeholders(self): dim2=self.config.maxnodesize dim1=self.config.batch_size self.input = tf.placeholder(tf.int32,[dim1,dim2],name='input') self.treestr = tf.placeholder(tf.int32,[dim1,dim2,2],name='tree') self.labels = tf.placeholder(tf.int32,[dim1,dim2],name='labels') self.dropout = tf.placeholder(tf.float32,name='dropout') self.n_inodes = tf.reduce_sum(tf.to_int32(tf.not_equal(self.treestr,-1)),[1,2]) self.n_inodes = self.n_inodes/2 self.num_leaves = tf.reduce_sum(tf.to_int32(tf.not_equal(self.input,-1)),[1]) self.batch_len = tf.placeholder(tf.int32,name="batch_len")
def add_embedding(self): #embed=np.load('glove{0}_uniform.npy'.format(self.emb_dim)) with tf.device('/cpu:0'): with tf.variable_scope("Embed"): embedding=tf.get_variable('embedding',[self.num_emb, self.emb_dim] ,initializer= tf.random_uniform_initializer(-0.05,0.05),trainable=True, regularizer=tf.contrib.layers.l2_regularizer(0.0)) ix=tf.to_int32(tf.not_equal(self.input,-1))*self.input emb = tf.nn.embedding_lookup(embedding,ix) emb = emb * tf.to_float(tf.not_equal(tf.expand_dims(self.input,2),-1)) return emb
def _add_rpn_losses(self, sigma_rpn=3.0): with tf.variable_scope('loss_' + self._tag) as scope: # RPN, class loss rpn_cls_score = tf.reshape(self._predictions['rpn_cls_score_reshape'], [-1, 2]) rpn_label = tf.reshape(self._anchor_targets['rpn_labels'], [-1]) rpn_select = tf.where(tf.not_equal(rpn_label, -1)) rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score, rpn_select), [-1, 2]) rpn_label = tf.reshape(tf.gather(rpn_label, rpn_select), [-1]) rpn_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_label)) # RPN, bbox loss rpn_bbox_pred = self._predictions['rpn_bbox_pred'] rpn_bbox_targets = self._anchor_targets['rpn_bbox_targets'] rpn_bbox_inside_weights = self._anchor_targets['rpn_bbox_inside_weights'] rpn_bbox_outside_weights = self._anchor_targets['rpn_bbox_outside_weights'] rpn_loss_box = self._smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, sigma=sigma_rpn, dim=[1, 2, 3]) self._losses['rpn_cross_entropy'] = rpn_cross_entropy self._losses['rpn_loss_box'] = rpn_loss_box self._losses['rpn_loss'] = rpn_loss_box + rpn_cross_entropy self._event_summaries.update(self._losses) return self._losses['rpn_loss']
def char_accuracy(predictions, targets, rej_char, streaming=False): """Computes character level accuracy. Both predictions and targets should have the same shape [batch_size x seq_length]. Args: predictions: predicted characters ids. targets: ground truth character ids. rej_char: the character id used to mark an empty element (end of sequence). streaming: if True, uses the streaming mean from the slim.metric module. Returns: a update_ops for execution and value tensor whose value on evaluation returns the total character accuracy. """ with tf.variable_scope('CharAccuracy'): predictions.get_shape().assert_is_compatible_with(targets.get_shape()) targets = tf.to_int32(targets) const_rej_char = tf.constant(rej_char, shape=targets.get_shape()) weights = tf.to_float(tf.not_equal(targets, const_rej_char)) correct_chars = tf.to_float(tf.equal(predictions, targets)) accuracy_per_example = tf.div( tf.reduce_sum(tf.multiply(correct_chars, weights), 1), tf.reduce_sum(weights, 1)) if streaming: return tf.contrib.metrics.streaming_mean(accuracy_per_example) else: return tf.reduce_mean(accuracy_per_example)
def build_loss(self, ohem=False): # classification loss rpn_cls_score = tf.reshape(self.get_output('rpn_cls_score_reshape'), [-1, 2]) # shape (HxWxA, 2) rpn_label = tf.reshape(self.get_output('rpn-data')[0], [-1]) # shape (HxWxA) # ignore_label(-1) fg_keep = tf.equal(rpn_label, 1) rpn_keep = tf.where(tf.not_equal(rpn_label, -1)) rpn_cls_score = tf.gather(rpn_cls_score, rpn_keep) # shape (N, 2) rpn_label = tf.gather(rpn_label, rpn_keep) rpn_cross_entropy_n = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=rpn_label,logits=rpn_cls_score) # box loss rpn_bbox_pred = self.get_output('rpn_bbox_pred') # shape (1, H, W, Ax4) rpn_bbox_targets = self.get_output('rpn-data')[1] rpn_bbox_inside_weights = self.get_output('rpn-data')[2] rpn_bbox_outside_weights = self.get_output('rpn-data')[3] rpn_bbox_pred = tf.gather(tf.reshape(rpn_bbox_pred, [-1, 4]), rpn_keep) # shape (N, 4) rpn_bbox_targets = tf.gather(tf.reshape(rpn_bbox_targets, [-1, 4]), rpn_keep) rpn_bbox_inside_weights = tf.gather(tf.reshape(rpn_bbox_inside_weights, [-1, 4]), rpn_keep) rpn_bbox_outside_weights = tf.gather(tf.reshape(rpn_bbox_outside_weights, [-1, 4]), rpn_keep) rpn_loss_box_n = tf.reduce_sum(rpn_bbox_outside_weights * self.smooth_l1_dist( rpn_bbox_inside_weights * (rpn_bbox_pred - rpn_bbox_targets)), reduction_indices=[1]) rpn_loss_box = tf.reduce_sum(rpn_loss_box_n) / (tf.reduce_sum(tf.cast(fg_keep, tf.float32)) + 1) rpn_cross_entropy = tf.reduce_mean(rpn_cross_entropy_n) model_loss = rpn_cross_entropy + rpn_loss_box regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n(regularization_losses) + model_loss return total_loss,model_loss, rpn_cross_entropy, rpn_loss_box
def dynamic_decode_and_search(self, embedding, start_tokens, end_token, vocab_size, initial_state=None, beam_width=5, length_penalty=0.0, maximum_iterations=250, mode=tf.estimator.ModeKeys.PREDICT, memory=None, memory_sequence_length=None, dtype=None): cache = self._init_cache(memory, memory_sequence_length=memory_sequence_length) symbols_to_logits_fn = self._symbols_to_logits_fn(embedding, vocab_size, mode) outputs, log_probs = beam_search( symbols_to_logits_fn, start_tokens, beam_width, maximum_iterations, vocab_size, length_penalty, states=cache, eos_id=end_token) outputs = tf.slice(outputs, [0, 0, 1], [-1, -1, -1]) # Ignore <s>. lengths = tf.not_equal(outputs, 0) lengths = tf.cast(lengths, tf.int32) lengths = tf.reduce_sum(lengths, axis=-1) return (outputs, None, lengths, log_probs)
def loss(logits, labels): """Add L2Loss to all the trainable variables. Add summary for "Loss" and "Loss/avg". Args: logits: Logits from inference(). labels: Labels from distorted_inputs or inputs(). 3-D tensor of shape [batch_size,IMAGE_SIZE,IMAGE_SIZE] Returns: Loss tensor of type float. """ labels = tf.cast(labels, tf.int64) label_shape = labels.get_shape().as_list() reshaped_labels = tf.reshape(labels, [label_shape[0]*label_shape[1]*label_shape[2]]) print(reshaped_labels.get_shape()) logits_shape =logits.get_shape().as_list() reshaped_logits = tf.reshape(logits, [logits_shape[0]*logits_shape[1]*logits_shape[2], logits_shape[3]]) cross_entropy_per_pixel = tf.nn.sparse_softmax_cross_entropy_with_logits( reshaped_logits, reshaped_labels, name='cross_entropy_per_pixel') no_loss_mask = tf.not_equal(reshaped_labels, -1) filtered_cross_entropy = tf.boolean_mask(cross_entropy_per_pixel, no_loss_mask, name='no_loss_mask') cross_entropy_mean = tf.reduce_mean(filtered_cross_entropy, name='cross_entropy') # cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) return tf.add_n(tf.get_collection('losses'), name='total_loss')
def while_exit_cond(result, logits, loss): # pylint: disable=unused-argument """Exit the loop either if reach decode_length or EOS.""" length = common_layers.shape_list(result)[1] not_overflow = length < decode_length if self._problem_hparams.stop_at_eos: def fn_not_eos(): return tf.not_equal( # Check if the last predicted element is a EOS tf.squeeze(result[:, -1, :, :]), text_encoder.EOS_ID) not_eos = tf.cond( # We only check for early stoping if there is at least 1 element ( # otherwise not_eos will crash) tf.not_equal(length, 0), fn_not_eos, lambda: True, ) return tf.cond( tf.equal(batch_size, 1), # If batch_size == 1, we check EOS for early stoping lambda: tf.logical_and(not_overflow, not_eos), # Else, just wait for max length lambda: not_overflow) return not_overflow
def compute_loss(self,emb_batch,curr_batch_size=None): outloss=[] prediction=[] for idx_batch in range(self.config.batch_size): tree_states=self.compute_states(emb_batch,idx_batch) logits = self.create_output(tree_states) labels1=tf.gather(self.labels,idx_batch) labels2=tf.reduce_sum(tf.to_int32(tf.not_equal(labels1,-1))) labels=tf.gather(labels1,tf.range(labels2)) loss = self.calc_loss(logits,labels) pred = tf.nn.softmax(logits) pred_root=tf.gather(pred,labels2-1) prediction.append(pred_root) outloss.append(loss) batch_loss=tf.pack(outloss) self.pred = tf.pack(prediction) return batch_loss
def get_mask(gt, num_classes, ignore_label): less_equal_class = tf.less_equal(gt, num_classes-1) not_equal_ignore = tf.not_equal(gt, ignore_label) mask = tf.logical_and(less_equal_class, not_equal_ignore) indices = tf.squeeze(tf.where(mask), 1) return indices
def measure(): E = tf.reduce_mean(energy(layers)) C = tf.reduce_mean(cost(layers)) y_prediction = tf.argmax(layers[-1], 1) error = tf.reduce_mean(tf.cast(tf.not_equal(y_prediction, tf.cast(y, tf.int64)), tf.float32)) return E, C, error
def padded_sequence_accuracy(predictions, labels, weights_fn=common_layers.weights_nonzero): """Percentage of times that predictions matches labels everywhere (non-0).""" # If the last dimension is 1 then we're using L1/L2 loss. if common_layers.shape_list(predictions)[-1] == 1: return rounding_sequence_accuracy( predictions, labels, weights_fn=weights_fn) with tf.variable_scope( "padded_sequence_accuracy", values=[predictions, labels]): padded_predictions, padded_labels = common_layers.pad_with_zeros( predictions, labels) weights = weights_fn(padded_labels) # Flatten, keeping batch dim (and num_classes dim for predictions) # TPU argmax can only deal with a limited number of dimensions predictions_shape = common_layers.shape_list(padded_predictions) batch_size = predictions_shape[0] num_classes = predictions_shape[-1] flat_size = common_layers.list_product( common_layers.shape_list(padded_labels)[1:]) padded_predictions = tf.reshape( padded_predictions, [batch_size, common_layers.list_product(predictions_shape[1:-1]), num_classes]) padded_labels = tf.reshape(padded_labels, [batch_size, flat_size]) weights = tf.reshape(weights, [batch_size, flat_size]) outputs = tf.to_int32(tf.argmax(padded_predictions, axis=-1)) padded_labels = tf.to_int32(padded_labels) not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights axis = list(range(1, len(outputs.get_shape()))) correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis)) return correct_seq, tf.constant(1.0)
def error(self): if self.num_out!=1: max_labels=tf.argmax(self.y, 1) else: max_labels=self.y mistakes=tf.not_equal(max_labels, tf.to_float(tf.argmax(self.prediction, 1))) return tf.reduce_mean(tf.cast(mistakes, tf.float32))
def classification_costs(logits, labels, name=None): """Compute classification cost mean and classification cost per sample Assume unlabeled examples have label == -1. For unlabeled examples, cost == 0. Compute the mean over all examples. Note that unlabeled examples are treated differently in error calculation. """ with tf.name_scope(name, "classification_costs") as scope: applicable = tf.not_equal(labels, -1) # Change -1s to zeros to make cross-entropy computable labels = tf.where(applicable, labels, tf.zeros_like(labels)) # This will now have incorrect values for unlabeled examples per_sample = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels) # Retain costs only for labeled per_sample = tf.where(applicable, per_sample, tf.zeros_like(per_sample)) # Take mean over all examples, not just labeled examples. labeled_sum = tf.reduce_sum(per_sample) total_count = tf.to_float(tf.shape(per_sample)[0]) mean = tf.div(labeled_sum, total_count, name=scope) return mean, per_sample
def rmse(self, vp): """ Root Mean Square Error Note that this needs to be evaluated on the rated items only Args: vp (tensor, float32): inferred output (Network prediction) Returns: err (tensor, float32): root mean square error """ with tf.name_scope("re"): mask = tf.not_equal(self.v, 0) # selects only the rated items n_values = tf.reduce_sum( tf.cast(mask, "float32"), axis=1 ) # number of rated items # evaluate the square difference between the inferred and the input data on the rated items e = tf.where( mask, x=tf.squared_difference(self.v, vp), y=tf.zeros_like(self.v) ) # evaluate the msre err = tf.sqrt( tf.reduce_mean(tf.div(tf.reduce_sum(e, axis=1), n_values)) / 2 ) return err
def call(self, x): """Get token embeddings of x. Args: x: An int64 tensor with shape [batch_size, length] Returns: embeddings: float32 tensor with shape [batch_size, length, embedding_size] padding: float32 tensor with shape [batch_size, length] indicating the locations of the padding tokens in x. """ with tf.name_scope("embedding"): # Create binary mask of size [batch_size, length] mask = tf.to_float(tf.not_equal(x, 0)) if self.method == "gather": embeddings = tf.gather(self.shared_weights, x) else: # matmul embeddings = tpu_utils.embedding_matmul( embedding_table=self.shared_weights, values=tf.cast(x, dtype=tf.int32), mask=mask ) embeddings *= tf.expand_dims(mask, -1) # Scale embedding by the sqrt of the hidden size embeddings *= self.hidden_size ** 0.5 return embeddings
def train_speech_to_text_network(): logit = speech_to_text_network() # CTC loss indices = tf.where(tf.not_equal(tf.cast(Y, tf.float32), 0.)) target = tf.SparseTensor(indices=indices, values=tf.gather_nd(Y, indices) - 1, shape=tf.cast(tf.shape(Y), tf.int64)) loss = tf.nn.ctc_loss(logit, target, sequence_len, time_major=False) # optimizer lr = tf.Variable(0.001, dtype=tf.float32, trainable=False) optimizer = MaxPropOptimizer(learning_rate=lr, beta2=0.99) var_list = [t for t in tf.trainable_variables()] gradient = optimizer.compute_gradients(loss, var_list=var_list) optimizer_op = optimizer.apply_gradients(gradient) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) for epoch in range(16): sess.run(tf.assign(lr, 0.001 * (0.97 ** epoch))) global pointer pointer = 0 for batch in range(n_batch): batches_wavs, batches_labels = get_next_batches(batch_size) train_loss, _ = sess.run([loss, optimizer_op], feed_dict={X: batches_wavs, Y: batches_labels}) print(epoch, batch, train_loss) if epoch % 5 == 0: saver.save(sess, 'speech.module', global_step=epoch)
def fast_rcnn_minibatch(self, reference_boxes): with tf.variable_scope('fast_rcnn_minibatch'): reference_boxes_mattached_gtboxes, object_mask, label = \ self.fast_rcnn_find_positive_negative_samples(reference_boxes) positive_indices = tf.reshape(tf.where(tf.not_equal(object_mask, 0.)), [-1]) num_of_positives = tf.minimum(tf.shape(positive_indices)[0], tf.cast(self.fast_rcnn_minibatch_size*self.fast_rcnn_positives_ratio, tf.int32)) positive_indices = tf.random_shuffle(positive_indices) positive_indices = tf.slice(positive_indices, begin=[0], size=[num_of_positives]) negative_indices = tf.reshape(tf.where(tf.equal(object_mask, 0.)), [-1]) num_of_negatives = tf.minimum(tf.shape(negative_indices)[0], self.fast_rcnn_minibatch_size - num_of_positives) negative_indices = tf.random_shuffle(negative_indices) negative_indices = tf.slice(negative_indices, begin=[0], size=[num_of_negatives]) minibatch_indices = tf.concat([positive_indices, negative_indices], axis=0) minibatch_indices = tf.random_shuffle(minibatch_indices) minibatch_reference_boxes_mattached_gtboxes = tf.gather(reference_boxes_mattached_gtboxes, minibatch_indices) object_mask = tf.gather(object_mask, minibatch_indices) label = tf.gather(label, minibatch_indices) label_one_hot = tf.one_hot(label, self.num_classes + 1) return minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, object_mask, label_one_hot
def loss(self, logits, labels, regularization): """Adds to the inference model the layers required to generate loss.""" with tf.name_scope('loss'): with tf.name_scope('var_loss'): labels = tf.cast(labels, tf.float32) shape = labels.get_shape() same_class = tf.boolean_mask(logits, tf.equal(labels, tf.ones(shape))) diff_class = tf.boolean_mask(logits, tf.not_equal(labels, tf.ones(shape))) same_mean, same_var = tf.nn.moments(same_class, [0]) diff_mean, diff_var = tf.nn.moments(diff_class, [0]) var_loss = same_var + diff_var with tf.name_scope('mean_loss'): mean_loss = self.lamda * tf.where(tf.greater(self.mu - (same_mean - diff_mean), 0), self.mu - (same_mean - diff_mean), 0) with tf.name_scope('regularization'): regularization *= tf.add_n(self.regularizers) loss = var_loss + mean_loss + regularization # Summaries for TensorBoard. tf.summary.scalar('loss/total', loss) with tf.name_scope('averages'): averages = tf.train.ExponentialMovingAverage(0.9) op_averages = averages.apply([var_loss, mean_loss, regularization, loss]) tf.summary.scalar('loss/avg/var_loss', averages.average(var_loss)) tf.summary.scalar('loss/avg/mean_loss', averages.average(mean_loss)) tf.summary.scalar('loss/avg/regularization', averages.average(regularization)) tf.summary.scalar('loss/avg/total', averages.average(loss)) with tf.control_dependencies([op_averages]): loss_average = tf.identity(averages.average(loss), name='control') return loss, loss_average
def _add_losses_ohem_nms(self, sigma_rpn=3.0): with tf.variable_scope('loss_' + self._tag) as scope: # RPN, class loss rpn_cls_score = tf.reshape(self._predictions['rpn_cls_score_reshape'], [-1, 2]) rpn_label = tf.reshape(self._anchor_targets['rpn_labels'], [-1]) rpn_select = tf.where(tf.not_equal(rpn_label, -1)) rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score, rpn_select), [-1, 2]) rpn_label = tf.reshape(tf.gather(rpn_label, rpn_select), [-1]) rpn_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_label)) # RPN, bbox loss rpn_bbox_pred = self._predictions['rpn_bbox_pred'] rpn_bbox_targets = self._anchor_targets['rpn_bbox_targets'] rpn_bbox_inside_weights = self._anchor_targets['rpn_bbox_inside_weights'] rpn_bbox_outside_weights = self._anchor_targets['rpn_bbox_outside_weights'] rpn_loss_box = self._smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, sigma=sigma_rpn, dim=[1, 2, 3]) # RCNN, class loss cls_score = self._predictions["cls_score"] label = tf.reshape(self._proposal_targets["labels"], [-1]) rfcn_cls_score = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=tf.reshape(cls_score, [-1, self._num_classes]), labels=label) # RCNN, bbox loss bbox_pred = self._predictions['bbox_pred'] bbox_targets = self._proposal_targets['bbox_targets'] bbox_inside_weights = self._proposal_targets['bbox_inside_weights'] bbox_outside_weights = self._proposal_targets['bbox_outside_weights'] loss_box_vector = self._smooth_l1_loss_vector(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) # ohem rois_boxes = self._proposal_targets['rois'] loss_before_nms = rfcn_cls_score + loss_box_vector ohem_indexes = tf.image.non_max_suppression(rois_boxes[:, 1:5], loss_before_nms, cfg.TRAIN.OHEM_B, cfg.TRAIN.OHEM_NMS_THRESH) rfcn_cls_score = tf.gather(rfcn_cls_score, ohem_indexes) loss_box_vector = tf.gather(loss_box_vector, ohem_indexes) cross_entropy = tf.reduce_mean(rfcn_cls_score) loss_box = tf.reduce_mean(loss_box_vector) self._losses['cross_entropy'] = cross_entropy self._losses['loss_box'] = loss_box self._losses['rpn_cross_entropy'] = rpn_cross_entropy self._losses['rpn_loss_box'] = rpn_loss_box self._losses['rpn_loss'] = rpn_loss_box + rpn_cross_entropy self._losses['class_loss'] = cross_entropy + loss_box loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box self._losses['total_loss'] = loss self._losses['ohem_indexes_counts'] = ohem_indexes self._event_summaries.update(self._losses) return loss
def padded_accuracy(logits, labels): """Percentage of times that predictions matches labels on non-0s.""" with tf.variable_scope("padded_accuracy", values=[logits, labels]): logits, labels = _pad_tensors_to_same_length(logits, labels) weights = tf.to_float(tf.not_equal(labels, 0)) outputs = tf.to_int32(tf.argmax(logits, axis=-1)) padded_labels = tf.to_int32(labels) return tf.to_float(tf.equal(outputs, padded_labels)), weights
def errors(self, y): if len(y.get_shape())!= len(self.y_pred.get_shape()): raise TypeError('y should have the same shape as self.y_pred', ('y', y.type, 'y_pred', self.y_pred.type)) if y.dtype in [tf.int32, tf.int16, tf.int8]: return tf.reduce_mean(tf.not_equal(self.y_pred, y)) else: raise NotImplementedError()
def errors(logits, labels, name=None): """Compute error mean and whether each unlabeled example is erroneous Assume unlabeled examples have label == -1. Compute the mean error over unlabeled examples. Mean error is NaN if there are no unlabeled examples. Note that unlabeled examples are treated differently in cost calculation. """ with tf.name_scope(name, "errors") as scope: applicable = tf.not_equal(labels, -1) labels = tf.boolean_mask(labels, applicable) logits = tf.boolean_mask(logits, applicable) predictions = tf.argmax(logits, -1) labels = tf.cast(labels, tf.int64) per_sample = tf.to_float(tf.not_equal(predictions, labels)) mean = tf.reduce_mean(per_sample, name=scope) return mean, per_sample
def _append_eow(self, sequences): """Append EOW character after end every given sequence.""" sequences_rev = tf.reverse_sequence(sequences, tf.reduce_sum(tf.cast(tf.not_equal(sequences, 0), tf.int32), axis=1), 1) sequences_rev_eow = tf.pad(sequences_rev, [[0, 0], [1, 0]], constant_values=MorphoDataset.Factor.EOW) return tf.reverse_sequence(sequences_rev_eow, tf.reduce_sum(tf.cast(tf.not_equal(sequences_rev_eow, 0), tf.int32), axis=1), 1)
def step(self, time, inputs, state, name=None): """Perform a decoding step. Args: time: scalar `int32` tensor. inputs: A (structure of) input tensors. state: A (structure of) state tensors and TensorArrays. name: Name scope for any created operations. Returns: `(outputs, next_state, next_inputs, finished)`. """ batch_size = self._batch_size beam_width = self._beam_width end_token = self._end_token length_penalty_weight = self._length_penalty_weight with ops.name_scope(name, "BeamSearchDecoderStep", (time, inputs, state)): cell_state = state.cell_state inputs = nest.map_structure( lambda inp: self._merge_batch_beams(inp, s=inp.shape[2:]), inputs) cell_state = nest.map_structure(self._maybe_merge_batch_beams, cell_state, self._cell.state_size) cell_outputs, next_cell_state = self._cell(inputs, cell_state) # finished = tf.Print(state.finished, [state.finished, 'finished', time], summarize=100) # not_finished = tf.Print(not_finished, [not_finished, 'not_finished', time], summarize=100) # cell_state.last_choice shape = [batch_size * beam_width] next_choices = gen_array_ops.gather_v2(self.lookup_table, cell_state.last_choice, axis=0) not_finished = tf.not_equal(next_choices[:, 0], end_token) next_next_choices = gen_array_ops.gather_v2(self.lookup_table, next_choices[:, 0], axis=0) will_finish = tf.logical_and( not_finished, tf.equal(next_next_choices[:, 0], end_token)) def move(will_finish, last_choice, cell_outputs): # cell_outputs = tf.Print(cell_outputs, [cell_outputs, 'cell_outputs', time], summarize=1000) # will_finish = tf.Print(will_finish, [will_finish, 'will_finish', time], summarize=100) attention_score = self._step_method(last_choice) attention_score = attention_score + cell_outputs # final = tf.Print(final, [final, 'finalll', time], summarize=1000) return tf.where(will_finish, attention_score, cell_outputs) if self._output_layer is not None: cell_outputs = self._output_layer(cell_outputs) # will_finish = tf.Print(will_finish, [will_finish, 'will_finish, beam_search', time], summarize=100) cell_outputs = tf.cond( tf.reduce_any(will_finish), false_fn=lambda: cell_outputs, true_fn=lambda: move(will_finish, cell_state.last_choice, cell_outputs)) if self.hie: cell_outputs = self._mask_outputs_by_lable( cell_outputs, cell_state.last_choice) # cell_state.last_choice shape = [batch_size*beam_width,] cell_outputs = nest.map_structure( lambda out: self._split_batch_beams(out, out.shape[1:]), cell_outputs) next_cell_state = nest.map_structure(self._maybe_split_batch_beams, next_cell_state, self._cell.state_size) beam_search_output, beam_search_state = _beam_search_step( time=time, logits=cell_outputs, next_cell_state=next_cell_state, beam_state=state, batch_size=batch_size, beam_width=beam_width, end_token=end_token, length_penalty_weight=length_penalty_weight) finished = beam_search_state.finished # replace the father ids sample_ids = beam_search_output.predicted_ids next_cell_state = beam_search_state.cell_state next_cell_state = next_cell_state._replace(last_choice=sample_ids) beam_search_state = beam_search_state._replace( cell_state=next_cell_state) # sample_ids shape = [batch_size, beam_width] next_inputs = control_flow_ops.cond( math_ops.reduce_all(finished), lambda: self._start_inputs, lambda: self._embedding_fn(sample_ids)) return (beam_search_output, beam_search_state, next_inputs, finished)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = segmentation_dataset.get_dataset(FLAGS.dataset, FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir) tf.gfile.MakeDirs(FLAGS.eval_logdir) tf.logging.info('Evaluating on %s set', FLAGS.eval_split) with tf.Graph().as_default(): samples = input_generator.get(dataset, FLAGS.eval_crop_size, FLAGS.eval_batch_size, min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, dataset_split=FLAGS.eval_split, is_training=False, model_variant=FLAGS.model_variant) model_options = common.ModelOptions( outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, crop_size=FLAGS.eval_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.eval_scales) == (1.0, ): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels( samples[common.IMAGE], model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] predictions = tf.reshape(predictions, shape=[-1]) labels = tf.reshape(samples[common.LABEL], shape=[-1]) weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label)) # Set ignore_label regions to label 0, because metrics.mean_iou requires # range of labels = [0, dataset.num_classes). Note the ignore_label regions # are not evaluated since the corresponding regions contain weights = 0. labels = tf.where(tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels) predictions_tag = 'miou' for eval_scale in FLAGS.eval_scales: predictions_tag += '_' + str(eval_scale) if FLAGS.add_flipped_images: predictions_tag += '_flipped' # Define the evaluation metric. metric_map = {} metric_map[predictions_tag] = tf.metrics.mean_iou(predictions, labels, dataset.num_classes, weights=weights) metrics_to_values, metrics_to_updates = ( tf.contrib.metrics.aggregate_metric_map(metric_map)) for metric_name, metric_value in six.iteritems(metrics_to_values): slim.summaries.add_scalar_summary(metric_value, metric_name, print_summary=True) num_batches = int( math.ceil(dataset.num_samples / float(FLAGS.eval_batch_size))) tf.logging.info('Eval num images %d', dataset.num_samples) tf.logging.info('Eval batch size %d and num batch %d', FLAGS.eval_batch_size, num_batches) num_eval_iters = None if FLAGS.max_number_of_evaluations > 0: num_eval_iters = FLAGS.max_number_of_evaluations slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.checkpoint_dir, logdir=FLAGS.eval_logdir, num_evals=num_batches, eval_op=list(metrics_to_updates.values()), max_number_of_evaluations=num_eval_iters, eval_interval_secs=FLAGS.eval_interval_secs)
def call(self, inputs): """Calculate target logits or inferred target sequences. Args: inputs: input tensor list of size 1 or 2. First item, inputs: int tensor with shape [batch_size, input_length]. Second item (optional), targets: None or int tensor with shape [batch_size, target_length]. Returns: If targets is defined, then return logits for each word in the target sequence. float tensor with shape [batch_size, target_length, vocab_size] If target is none, then generate output sequence one token at a time. returns a dictionary { outputs: [batch_size, decoded length] scores: [batch_size, float]} Even when float16 is used, the output tensor(s) are always float32. Raises: NotImplementedError: If try to use padded decode method on CPU/GPUs. """ if len(inputs) == 2: inputs, targets = inputs[0], inputs[1] else: # Decoding path. inputs, targets = inputs[0], None # TODO(hongkuny): The check is not necessary. Fix this part. if self._padded_decode: if not self._num_replicas: raise NotImplementedError( "Padded decoding on CPU/GPUs is not supported.") decode_batch_size = int(self._decode_batch_size / self._num_replicas) inputs.set_shape([decode_batch_size, self._decode_max_length]) with tf.name_scope("Transformer"): attention_bias = model_utils.get_padding_bias(inputs) attention_bias = tf.cast(attention_bias, self._dtype) with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_lookup(inputs) embedding_mask = tf.cast( tf.not_equal(inputs, 0), self.embedding_lookup.embeddings.dtype) embedded_inputs *= tf.expand_dims(embedding_mask, -1) embedded_inputs = tf.cast(embedded_inputs, self._dtype) # Attention_mask generation. input_shape = tf_utils.get_shape_list(inputs, expected_rank=2) attention_mask = tf.cast( tf.reshape( tf.not_equal(inputs, 0), [input_shape[0], 1, input_shape[1]]), dtype=inputs.dtype) broadcast_ones = tf.ones( shape=[input_shape[0], input_shape[1], 1], dtype=inputs.dtype) attention_mask = broadcast_ones * attention_mask with tf.name_scope("add_pos_encoding"): pos_encoding = self.position_embedding(inputs=embedded_inputs) pos_encoding = tf.cast(pos_encoding, self._dtype) encoder_inputs = embedded_inputs + pos_encoding encoder_inputs = self.encoder_dropout(encoder_inputs) encoder_outputs = self.encoder_layer( encoder_inputs, attention_mask=attention_mask) if targets is None: encoder_decoder_attention_bias = attention_bias encoder_outputs = tf.cast(encoder_outputs, self._dtype) if self._padded_decode: batch_size = encoder_outputs.shape.as_list()[0] input_length = encoder_outputs.shape.as_list()[1] else: batch_size = tf.shape(encoder_outputs)[0] input_length = tf.shape(encoder_outputs)[1] max_decode_length = input_length + self._extra_decode_length encoder_decoder_attention_bias = tf.cast(encoder_decoder_attention_bias, self._dtype) symbols_to_logits_fn = self._get_symbols_to_logits_fn(max_decode_length) # Create initial set of IDs that will be passed to symbols_to_logits_fn. initial_ids = tf.zeros([batch_size], dtype=tf.int32) # Create cache storing decoder attention values for each layer. # pylint: disable=g-complex-comprehension init_decode_length = (max_decode_length if self._padded_decode else 0) num_heads = self._num_heads dim_per_head = self._hidden_size // num_heads cache = { str(layer): { "key": tf.zeros([ batch_size, init_decode_length, num_heads, dim_per_head ], dtype=self._dtype), "value": tf.zeros([ batch_size, init_decode_length, num_heads, dim_per_head ], dtype=self._dtype) } for layer in range(self._num_layers) } # pylint: enable=g-complex-comprehension # Add encoder output and attention bias to the cache. cache["encoder_outputs"] = encoder_outputs cache["encoder_decoder_attention_bias"] = encoder_decoder_attention_bias # Use beam search to find the top beam_size sequences and scores. decoded_ids, scores = beam_search.sequence_beam_search( symbols_to_logits_fn=symbols_to_logits_fn, initial_ids=initial_ids, initial_cache=cache, vocab_size=self._vocab_size, beam_size=self._beam_size, alpha=self._alpha, max_decode_length=max_decode_length, eos_id=EOS_ID, padded_decode=self._padded_decode, dtype=self._dtype) # Get the top sequence for each batch element top_decoded_ids = decoded_ids[:, 0, 1:] top_scores = scores[:, 0] return {"outputs": top_decoded_ids, "scores": top_scores} else: with tf.name_scope("decode"): decoder_inputs = self.embedding_lookup(targets) embedding_mask = tf.cast( tf.not_equal(targets, 0), self.embedding_lookup.embeddings.dtype) decoder_inputs *= tf.expand_dims(embedding_mask, -1) decoder_inputs = tf.cast(decoder_inputs, self._dtype) with tf.name_scope("shift_targets"): # Shift targets to the right, and remove the last element decoder_inputs = tf.pad(decoder_inputs, [[0, 0], [1, 0], [0, 0]])[:, :-1, :] with tf.name_scope("add_pos_encoding"): length = tf.shape(decoder_inputs)[1] pos_encoding = self.position_embedding(decoder_inputs) pos_encoding = tf.cast(pos_encoding, self._dtype) decoder_inputs += pos_encoding decoder_inputs = self.decoder_dropout(decoder_inputs) decoder_shape = tf_utils.get_shape_list( decoder_inputs, expected_rank=3) batch_size = decoder_shape[0] decoder_length = decoder_shape[1] self_attention_mask = tf.linalg.band_part( tf.ones([length, length], dtype=tf.float32), -1, 0) self_attention_mask = tf.reshape(self_attention_mask, [1, length, length]) self_attention_mask = tf.tile(self_attention_mask, [batch_size, 1, 1]) attention_mask = tf.cast( tf.expand_dims(tf.not_equal(inputs, 0), axis=1), dtype=inputs.dtype) attention_mask = tf.tile(attention_mask, [1, decoder_length, 1]) outputs = self.decoder_layer( decoder_inputs, encoder_outputs, memory_mask=self_attention_mask, target_mask=attention_mask) logits = embedding_linear(self.embedding_lookup.embeddings, outputs) logits = tf.cast(logits, tf.float32) return logits
def build_loss(self, ohem=False): # RPN # classification loss rpn_cls_score = tf.reshape(self.get_output('rpn_cls_score_reshape'), [-1, 2]) # shape (HxWxA, 2) rpn_label = tf.reshape(self.get_output('rpn-data')[0], [-1]) # shape (HxWxA) # ignore_label(-1) fg_keep = tf.equal(rpn_label, 1) rpn_keep = tf.where(tf.not_equal(rpn_label, -1)) rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score, rpn_keep), [-1, 2]) # shape (N, 2) rpn_label = tf.reshape(tf.gather(rpn_label, rpn_keep), [-1]) rpn_cross_entropy_n = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_label) rpn_cross_entropy = tf.reduce_mean(rpn_cross_entropy_n) # box loss rpn_bbox_pred = self.get_output('rpn_bbox_pred') # shape (1, H, W, Ax4) rpn_bbox_targets = self.get_output('rpn-data')[1] rpn_bbox_inside_weights = self.get_output('rpn-data')[2] rpn_bbox_outside_weights = self.get_output('rpn-data')[3] rpn_bbox_pred = tf.reshape(tf.gather(tf.reshape(rpn_bbox_pred, [-1, 4]), rpn_keep), [-1, 4]) # shape (N, 4) rpn_bbox_targets = tf.reshape(tf.gather(tf.reshape(rpn_bbox_targets, [-1, 4]), rpn_keep), [-1, 4]) rpn_bbox_inside_weights = tf.reshape(tf.gather(tf.reshape(rpn_bbox_inside_weights, [-1, 4]), rpn_keep), [-1, 4]) rpn_bbox_outside_weights = tf.reshape(tf.gather(tf.reshape(rpn_bbox_outside_weights, [-1, 4]), rpn_keep), [-1, 4]) rpn_loss_box_n = tf.reduce_sum(self.smooth_l1_dist( rpn_bbox_inside_weights * (rpn_bbox_pred - rpn_bbox_targets)), axis=[1]) # rpn_loss_n = tf.reshape(rpn_cross_entropy_n + rpn_loss_box_n * 5, [-1]) if ohem: # k = tf.minimum(tf.shape(rpn_cross_entropy_n)[0] / 2, 300) # # k = tf.shape(rpn_loss_n)[0] / 2 # rpn_loss_n, top_k_indices = tf.nn.top_k(rpn_cross_entropy_n, k=k, sorted=False) # rpn_cross_entropy_n = tf.gather(rpn_cross_entropy_n, top_k_indices) # rpn_loss_box_n = tf.gather(rpn_loss_box_n, top_k_indices) # strategy: keeps all the positive samples fg_ = tf.equal(rpn_label, 1) bg_ = tf.equal(rpn_label, 0) pos_inds = tf.where(fg_) neg_inds = tf.where(bg_) rpn_cross_entropy_n_pos = tf.reshape(tf.gather(rpn_cross_entropy_n, pos_inds), [-1]) rpn_cross_entropy_n_neg = tf.reshape(tf.gather(rpn_cross_entropy_n, neg_inds), [-1]) top_k = tf.cast(tf.minimum(tf.shape(rpn_cross_entropy_n_neg)[0], 300), tf.int32) rpn_cross_entropy_n_neg, _ = tf.nn.top_k(rpn_cross_entropy_n_neg, k=top_k) rpn_cross_entropy = tf.reduce_sum(rpn_cross_entropy_n_neg) / (tf.reduce_sum(tf.cast(bg_, tf.float32)) + 1.0) \ + tf.reduce_sum(rpn_cross_entropy_n_pos) / (tf.reduce_sum(tf.cast(fg_, tf.float32)) + 1.0) rpn_loss_box_n = tf.reshape(tf.gather(rpn_loss_box_n, pos_inds), [-1]) # rpn_cross_entropy_n = tf.concat(0, (rpn_cross_entropy_n_pos, rpn_cross_entropy_n_neg)) # rpn_loss_box = 1 * tf.reduce_mean(rpn_loss_box_n) rpn_loss_box = tf.reduce_sum(rpn_loss_box_n) / (tf.reduce_sum(tf.cast(fg_keep, tf.float32)) + 1.0) # R-CNN # classification loss cls_score = self.get_output('cls_score') # (R, C+1) label = tf.reshape(self.get_output('roi-data')[1], [-1]) # (R) cross_entropy_n = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=label) # bounding box regression L1 loss bbox_pred = self.get_output('bbox_pred') # (R, (C+1)x4) bbox_targets = self.get_output('roi-data')[2] # (R, (C+1)x4) # each element is {0, 1}, represents background (0), objects (1) bbox_inside_weights = self.get_output('roi-data')[3] # (R, (C+1)x4) bbox_outside_weights = self.get_output('roi-data')[4] # (R, (C+1)x4) loss_box_n = tf.reduce_sum( \ bbox_outside_weights * self.smooth_l1_dist(bbox_inside_weights * (bbox_pred - bbox_targets)), \ axis=[1]) loss_n = loss_box_n + cross_entropy_n loss_n = tf.reshape(loss_n, [-1]) # if ohem: # # top_k = 100 # top_k = tf.minimum(tf.shape(loss_n)[0] / 2, 500) # loss_n, top_k_indices = tf.nn.top_k(loss_n, k=top_k, sorted=False) # loss_box_n = tf.gather(loss_box_n, top_k_indices) # cross_entropy_n = tf.gather(cross_entropy_n, top_k_indices) loss_box = tf.reduce_mean(loss_box_n) cross_entropy = tf.reduce_mean(cross_entropy_n) loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box # add regularizer if cfg.TRAIN.WEIGHT_DECAY > 0: regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) loss = tf.add_n(regularization_losses) + loss return loss, cross_entropy, loss_box, rpn_cross_entropy, rpn_loss_box
label = features['label'] index = features['index'] value = features['value'] dense_feature = tf.sparse_to_dense( tf.sparse_tensor_to_dense(index), [ num_features, ], # tf.constant([33762578, 1], dtype=tf.int64), tf.sparse_tensor_to_dense(value)) dense_feature = tf.reshape(dense_feature, [num_features, 1]) dotProduct = tf.matmul(tf.transpose(w), dense_feature) y = tf.cast(label, tf.float32) error = tf.not_equal(tf.sign(dotProduct), y) ################### TEST ENDS ############################################# with tf.Session("grpc://vm-32-%d:2222" % (FLAGS.task_index + 1)) as sess: # only one client initializes the variable if FLAGS.task_index == 0: sess.run(tf.initialize_all_variables()) # start queue runners coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) num_examples = 1000 for i in xrange(0, 10000): sess.run(w) if ((i % 1000) == 0):
def error(self): mistakes = tf.not_equal(tf.argmax(self.target, 1), tf.argmax(self.prediction, 1)) return tf.reduce_mean(tf.cast(mistakes, tf.float32))
def add_softmax_cross_entropy_loss_for_each_scale( scales_to_logits, labels, num_classes, dataset, loss_weight=1.0, upsample_logits=True, scope=None, enable_class_balancing=False): """Adds softmax cross entropy loss for logits of each scale. Args: scales_to_logits: A map from logits names for different scales to logits. The logits have shape [batch, logits_height, logits_width, num_classes]. labels: Groundtruth labels with shape [batch, image_height, image_width, 1]. num_classes: Integer, number of target classes. ignore_label: Integer, label to ignore. loss_weight: Float, loss weight. upsample_logits: Boolean, upsample logits or not. scope: String, the scope for the loss. Raises: ValueError: Label or logits is None. """ if labels is None: raise ValueError('No label for softmax cross entropy loss.') for scale, logits in six.iteritems(scales_to_logits): loss_scope = None if scope: loss_scope = '%s_%s' % (scope, scale) if upsample_logits: # Label is not downsampled, and instead we upsample logits. logits = tf.image.resize_bilinear(logits, tf.shape(labels)[1:3], align_corners=True) scaled_labels = labels else: # Label is downsampled to the same size as logits. scaled_labels = tf.image.resize_nearest_neighbor( labels, tf.shape(logits)[1:3], align_corners=True) scaled_labels = tf.reshape(scaled_labels, shape=[-1]) one_hot_labels = slim.one_hot_encoding(scaled_labels, num_classes, on_value=1.0, off_value=0.0) if enable_class_balancing: tf.logging.info('Using class balancing for loss function.') if dataset.cls_to_percentage is None: raise ValueError( 'Class balancing for {} currently not supported'.format( dataset.name)) class_weights = dataset.get_class_weights( dataset.labels_to_class, dataset.cls_to_percentage) class_weights = tf.constant(class_weights) weights = tf.reduce_sum(tf.multiply(one_hot_labels, class_weights), 1) else: weights = tf.to_float( tf.not_equal(scaled_labels, dataset.ignore_label)) * loss_weight tf.losses.softmax_cross_entropy(one_hot_labels, tf.reshape(logits, shape=[-1, num_classes]), weights=weights, scope=loss_scope)
def compute_mask(self, inputs, mask=None): if not self.mask_zero: return None return tf.not_equal(inputs, 0)
def decode_sparse(self, include_stop_tokens=True): dense_symbols, logprobs = self.decode_dense() mask = tf.not_equal(dense_symbols, self.stop_token) if include_stop_tokens: mask = tf.concat(1, [tf.ones_like(mask[:, :1]), mask[:, :-1]]) return sparse_boolean_mask(dense_symbols, mask), logprobs
def error(self): mistakes = tf.not_equal( tf.argmax(self.target, 1), tf.argmax(self.prediction, 1)) error = tf.reduce_mean(tf.cast(mistakes, tf.float32)) tf.summary.scalar("error", error) return error
def _parse_train_data(self, data): """Parse data for ShapeMask training.""" classes = data['groundtruth_classes'] boxes = data['groundtruth_boxes'] masks = data['groundtruth_instance_masks'] is_crowds = data['groundtruth_is_crowd'] # Skips annotations with `is_crowd` = True. if self._skip_crowd_during_training and self._is_training: num_groundtrtuhs = tf.shape(classes)[0] with tf.control_dependencies([num_groundtrtuhs, is_crowds]): indices = tf.cond( tf.greater(tf.size(is_crowds), 0), lambda: tf.where(tf.logical_not(is_crowds))[:, 0], lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64)) classes = tf.gather(classes, indices) boxes = tf.gather(boxes, indices) masks = tf.gather(masks, indices) # Gets original image and its size. image = data['image'] image_shape = tf.shape(image)[0:2] # If not using category, makes all categories with id = 0. if not self._use_category: classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32) # Normalizes image with mean and std pixel values. image = input_utils.normalize_image(image) # Flips image randomly during training. if self._aug_rand_hflip: image, boxes, masks = input_utils.random_horizontal_flip( image, boxes, masks) # Converts boxes from normalized coordinates to pixel coordinates. boxes = box_utils.denormalize_boxes(boxes, image_shape) # Resizes and crops image. image, image_info = input_utils.resize_and_crop_image( image, self._output_size, self._output_size, aug_scale_min=self._aug_scale_min, aug_scale_max=self._aug_scale_max) image_scale = image_info[2, :] offset = image_info[3, :] # Resizes and crops boxes and masks. boxes = input_utils.resize_and_crop_boxes( boxes, image_scale, self._output_size, offset) masks = input_utils.resize_and_crop_masks( tf.expand_dims(masks, axis=-1), image_scale, self._output_size, offset) masks = tf.squeeze(masks, axis=-1) # Filters out ground truth boxes that are all zeros. indices = input_utils.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) masks = tf.gather(masks, indices) # Assigns anchors. input_anchor = anchor.Anchor( self._min_level, self._max_level, self._num_scales, self._aspect_ratios, self._anchor_size, self._output_size) anchor_labeler = anchor.AnchorLabeler( input_anchor, self._match_threshold, self._unmatched_threshold) (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors( boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32)) # Sample groundtruth masks/boxes/classes for mask branch. num_masks = tf.shape(masks)[0] mask_shape = tf.shape(masks)[1:3] # Pad sampled boxes/masks/classes to a constant batch size. padded_boxes = input_utils.pad_to_fixed_size(boxes, self._num_sampled_masks) padded_classes = input_utils.pad_to_fixed_size( classes, self._num_sampled_masks) padded_masks = input_utils.pad_to_fixed_size(masks, self._num_sampled_masks) # Randomly sample groundtruth masks for mask branch training. For the image # without groundtruth masks, it will sample the dummy padded tensors. rand_indices = tf.random.uniform( [self._num_sampled_masks], minval=0, maxval=tf.maximum(num_masks, 1), dtype=tf.dtypes.int32) sampled_boxes = tf.gather(padded_boxes, rand_indices) sampled_classes = tf.gather(padded_classes, rand_indices) sampled_masks = tf.gather(padded_masks, rand_indices) # Jitter the sampled boxes to mimic the noisy detections. sampled_boxes = box_utils.jitter_boxes( sampled_boxes, noise_scale=self._box_jitter_scale) # Compute mask targets in feature crop. A feature crop fully contains a # sampled box. mask_outer_boxes = box_utils.compute_outer_boxes( sampled_boxes, mask_shape, scale=self._outer_box_scale) norm_mask_outer_boxes = box_utils.normalize_boxes( mask_outer_boxes, mask_shape) # Set sampled_masks shape to [batch_size, height, width, 1]. sampled_masks = tf.expand_dims(sampled_masks, axis=-1) mask_targets = tf.image.crop_and_resize( sampled_masks, norm_mask_outer_boxes, box_ind=tf.range(self._num_sampled_masks), crop_size=[self._mask_crop_size, self._mask_crop_size], method='bilinear', extrapolation_value=0, name='train_mask_targets') mask_targets = tf.where(tf.greater_equal(mask_targets, 0.5), tf.ones_like(mask_targets), tf.zeros_like(mask_targets)) mask_targets = tf.squeeze(mask_targets, axis=-1) # If bfloat16 is used, casts input image to tf.bfloat16. if self._use_bfloat16: image = tf.cast(image, dtype=tf.bfloat16) # Packs labels for model_fn outputs. labels = { 'cls_targets': cls_targets, 'box_targets': box_targets, 'anchor_boxes': input_anchor.multilevel_boxes, 'num_positives': num_positives, 'image_info': image_info, # For ShapeMask. 'mask_boxes': sampled_boxes, 'mask_outer_boxes': mask_outer_boxes, 'mask_targets': mask_targets, 'mask_classes': sampled_classes, 'mask_is_valid': tf.cast(tf.not_equal(num_masks, 0), tf.int32) } return image, labels
def compute_loss(self, y_true1, y_pred): ''' Compute the loss of the SSD model prediction against the ground truth. Arguments: y_true1 (array): A Numpy array of shape `(batch_size, #boxes, #classes + 12 (NEW Here)+5(gt label,xmin,ymin,xmax,ymax))`, where `#boxes` is the total number of boxes that the model predicts per image. Be careful to make sure that the index of each given box in `y_true` is the same as the index for the corresponding box in `y_pred`. The last axis must have length `#classes + 12` and contain `[classes one-hot encoded, 4 ground truth box coordinate offsets, 8 arbitrary entries]` in this order, including the background class. The last eight entries of the last axis are not used by this function and therefore their contents are irrelevant, they only exist so that `y_true` has the same shape as `y_pred`, where the last four entries of the last axis contain the anchor box coordinates, which are needed during inference. Important: Boxes that you want the cost function to ignore need to have a one-hot class vector of all zeros. y_pred (Keras tensor): The model prediction. The shape is identical to that of `y_true`, i.e. `(batch_size, #boxes, #classes + 12)`. The last axis must contain entries in the format `[classes one-hot encoded, 4 predicted box coordinate offsets, 8 arbitrary entries]`. Returns: A scalar, the total multitask loss for classification and localization. ''' # arm total loss y_true = y_true1[:, :, :-5] self.neg_pos_ratio = tf.constant(self.neg_pos_ratio) self.n_neg_min = tf.constant(self.n_neg_min) self.alpha = tf.constant(self.alpha) batch_size = tf.shape(y_pred)[0] # Output dtype: tf.int32 n_boxes = tf.shape( y_pred )[1] # Output dtype: tf.int32, note that `n_boxes` in this context denotes the total number of boxes per image, not the number of boxes per cell. positives_arm = tf.reduce_sum(y_true[:, :, 1:self.n_class], axis=2, keepdims=True) y_true_arm = y_true[:, :, 0:1] y_true_arm = tf.concat([y_true_arm, positives_arm], axis=-1) # 1: Compute the losses for class and box predictions for every box. classification_loss = tf.to_float( self.log_loss(y_true_arm[:, :, :], y_pred[:, :, 0:2])) # Output shape: (batch_size, n_boxes) localization_loss = tf.to_float( self.smooth_L1_loss( y_true[:, :, -12:-8], y_pred[:, :, 2:6])) # Output shape: (batch_size, n_boxes) # 2: Compute the classification losses for the positive and negative targets. # Create masks for the positive and negative ground truth classes. negatives = y_true_arm[:, :, 0] # Tensor of shape (batch_size, n_boxes) positives = y_true_arm[:, :, 1] # Tensor of shape (batch_size, n_boxes) # Count the number of positive boxes (classes 1 to n) in y_true across the whole batch. n_positive = tf.reduce_sum(positives) # n_positive = tf.Print(n_positive, [n_positive], # message='Debug message arm_n_positive:', # first_n=10000, summarize=100000) # Now mask all negative boxes and sum up the losses for the positive boxes PER batch item # (Keras loss functions must output one scalar loss value PER batch item, rather than just # one scalar for the entire batch, that's why we're not summing across all axes). pos_class_loss = tf.reduce_sum( classification_loss * positives, axis=-1) # Tensor of shape (batch_size,) # Compute the classification loss for the negative default boxes (if there are any). # First, compute the classification loss for all negative boxes. neg_class_loss_all = classification_loss * negatives # Tensor of shape (batch_size, n_boxes) n_neg_losses = tf.count_nonzero( neg_class_loss_all, dtype=tf.int32 ) # The number of non-zero loss entries in `neg_class_loss_all` # What's the point of `n_neg_losses`? For the next step, which will be to compute which negative boxes enter the classification # loss, we don't just want to know how many negative ground truth boxes there are, but for how many of those there actually is # a positive (i.e. non-zero) loss. This is necessary because `tf.nn.top-k()` in the function below will pick the top k boxes with # the highest losses no matter what, even if it receives a vector where all losses are zero. In the unlikely event that all negative # classification losses ARE actually zero though, this behavior might lead to `tf.nn.top-k()` returning the indices of positive # boxes, leading to an incorrect negative classification loss computation, and hence an incorrect overall loss computation. # We therefore need to make sure that `n_negative_keep`, which assumes the role of the `k` argument in `tf.nn.top-k()`, # is at most the number of negative boxes for which there is a positive classification loss. # Compute the number of negative examples we want to account for in the loss. # We'll keep at most `self.neg_pos_ratio` times the number of positives in `y_true`, but at least `self.n_neg_min` (unless `n_neg_loses` is smaller). # 这里是计算,最小的topk个负样本的loss值的坐标,方便后面取出,数量是self.neg_pos_ratio*正样本的数量 # (Here is the coordinate of the loss value of the smallest topk negative sample, which is easy to take out later.total num is self.neg_pos_ratio*npositive ) n_negative_keep = tf.minimum( tf.maximum(self.neg_pos_ratio * tf.to_int32(n_positive), self.n_neg_min), n_neg_losses) # In the unlikely case when either (1) there are no negative ground truth boxes at all # or (2) the classification loss for all negative boxes is zero, return zero as the `neg_class_loss`. def f1(): return tf.zeros([batch_size]) # Otherwise compute the negative loss. def f2(): # Now we'll identify the top-k (where k == `n_negative_keep`) boxes with the highest confidence loss that # belong to the background class in the ground truth data. Note that this doesn't necessarily mean that the model # predicted the wrong class for those boxes, it just means that the loss for those boxes is the highest. # To do this, we reshape `neg_class_loss_all` to 1D... neg_class_loss_all_1D = tf.reshape( neg_class_loss_all, [-1]) # Tensor of shape (batch_size * n_boxes,) # ...and then we get the indices for the `n_negative_keep` boxes with the highest loss out of those... values, indices = tf.nn.top_k( neg_class_loss_all_1D, k=n_negative_keep, sorted=False) # We don't need them sorted. # ...and with these indices we'll create a mask... negatives_keep = tf.scatter_nd( indices=tf.expand_dims(indices, axis=1), updates=tf.ones_like(indices, dtype=tf.int32), shape=tf.shape(neg_class_loss_all_1D )) # Tensor of shape (batch_size * n_boxes,) negatives_keep = tf.to_float( tf.reshape(negatives_keep, [batch_size, n_boxes ])) # Tensor of shape (batch_size, n_boxes) # ...and use it to keep only those boxes and mask all other classification losses neg_class_loss = tf.reduce_sum( classification_loss * negatives_keep, axis=-1) # Tensor of shape (batch_size,) return neg_class_loss neg_class_loss = tf.cond(tf.equal(n_neg_losses, tf.constant(0)), f1, f2) class_loss_arm = pos_class_loss + neg_class_loss # Tensor of shape (batch_size,) # new 只计算正样本loss # 3: Compute the localization loss for the positive targets. # We don't compute a localization loss for negative predicted boxes (obviously: there are no ground truth boxes they would correspond to). loc_loss_arm = tf.reduce_sum(localization_loss * positives, axis=-1) # Tensor of shape (batch_size,) total_loss_arm = (class_loss_arm + self.alpha * loc_loss_arm) / tf.maximum(1.0, n_positive) ########################odm loss########################### # # arm预测的loc 将其decode作为新的gtbox坐标(The loc predicted by arm takes its decode as a new gtbox coordinate ) y_pred_decoded_raw = y_pred[:, :, 2:14] #(0-12):gt(cx, cy ,w, h) prior(cx ,cy ,w, h) variance(0.1,0.1,0.2,0.2) # xmin, ymin, xmax, ymax decode hpref = tf.exp(y_pred_decoded_raw[:, :, 3:4] * y_pred_decoded_raw[:, :, -1:]) wpref = tf.exp( y_pred_decoded_raw[:, :, 2:3] * y_pred_decoded_raw[:, :, -2:-1] ) # exp(ln(w(pred)/w(anchor)) / w_variance * w_variance) == w(pred) / w(anchor), exp(ln(h(pred)/h(anchor)) / h_variance * h_variance) == h(pred) / h(anchor) hpref = hpref * y_pred_decoded_raw[:, :, -5:-4] wpref = wpref * y_pred_decoded_raw[:, :, -6:-5] cypref = y_pred_decoded_raw[:, :, 1: 2] * y_pred_decoded_raw[:, :, -3: -2] * y_pred_decoded_raw[:, :, -5: -4] cxpref = y_pred_decoded_raw[:, :, 0: 1] * y_pred_decoded_raw[:, :, -4: -3] * y_pred_decoded_raw[:, :, -6: -5] cypref = cypref + y_pred_decoded_raw[:, :, -7:-6] cxpref = cxpref + y_pred_decoded_raw[:, :, -8:-7] xmin_a = (cxpref - wpref / 2.0) * 320 # Set xmin ymin_a = (cypref - hpref / 2.0) * 320 # Set ymin xmax_a = (cxpref + wpref / 2.0) * 320 # Set xmax ymax_a = (cypref + hpref / 2.0) * 320 # Set ymax vol_anchors = (xmax_a - xmin_a) * (ymax_a - ymin_a) # 重新匹配所有gtbox和arm loc deode后的坐标,即将arm loc的坐标encode为gt,然后作为refine后的true给odm部分计算loss # Rematch all the coordinates after gtbox and arm loc deode, that is, the coordinate encode of arm loc is gt, and then calculate loss to the odm part as true after refine # gt_labels存放一个batch里面所有gtboxxe和class,(batch_size,anchors_id,(class,xmin, ymin, xmax, ymax)) # Gt_labels holds all gtboxes and class in a batch gt_bboxes = y_true1[:, :, self.n_class + 12:] gt_labels = y_true1[:, :, self.n_class + 12:self.n_class + 13] # gt_num_max = self.gt_num_max # # 初始化各参数 # Initialization parameters feat_labels = tf.cast( tf.zeros_like(y_true1[:, :, 0:1]), tf.int32 ) # 存放默认框匹配的GTbox标签(Store refine anchor matching gtbox tags ) feat_scores = tf.zeros_like( y_true1[:, :, 0:1] ) # 存放默认框与匹配的GTbox的IOU(交并比)(Store refine anchor matching gtbox iou ) feat_matched = tf.cast( tf.zeros_like(y_true1[:, :, 0:1]), tf.int32 ) #存放后续过滤等操作的标记样本,作为mask操作的判断依据(Store tag samples for subsequent filtering and other operations as the basis for judging mask operations ) feat_gtnum = tf.cast(tf.zeros_like(y_true1[:, :, 0:1]), tf.int32) feat_ymin = tf.zeros_like( y_true1[:, :, 0:1] ) # 存放默认框匹配到的GTbox的坐标信息(Store the coordinate information of the refine anchor matching gtbox) feat_xmin = tf.zeros_like(y_true1[:, :, 0:1]) feat_ymax = tf.zeros_like(y_true1[:, :, 0:1]) feat_xmax = tf.zeros_like(y_true1[:, :, 0:1]) def jaccard_with_anchors(label, bbox): # 计算重叠度函数(cal iou) # 计算iou int_xmin = tf.maximum(label[:, :, 0:1], bbox[:, :, 0:1]) int_ymin = tf.maximum(label[:, :, 1:2], bbox[:, :, 1:2]) int_xmax = tf.minimum(label[:, :, 2:3], bbox[:, :, 2:3]) int_ymax = tf.minimum(label[:, :, 3:4], bbox[:, :, 3:4]) h = tf.maximum(int_ymax - int_ymin, 0.) w = tf.maximum(int_xmax - int_xmin, 0.) # Volumes. inter_vol = h * w union_vol = vol_anchors - inter_vol + ( label[:, :, 2:3] - label[:, :, 0:1]) * (label[:, :, 3:4] - label[:, :, 1:2]) # iou scores jaccard = tf.div(inter_vol, union_vol) return jaccard def condition( i, feat_labels, feat_scores, feat_gtnum, # 循环条件 feat_xmin, feat_ymin, feat_xmax, feat_ymax): # 循环每个image内所有gt box(Loop through all gt box within each image) r = tf.less( tf.cast(i, dtype=tf.float32), gt_num_max ) # tf.shape(labels)GTbox num,if i<=tf.shape(labels) return True return r def body( i, feat_labels, feat_scores, feat_gtnum, # 循环执行主体 feat_xmin, feat_ymin, feat_xmax, feat_ymax): """ 寻找每个GTbox与所有anchor的iou,根据每次iou的分数更新iou,大于上一步iou的就存入新的iou以及其它相应的标记和坐标值 Find each gtbox with all anchor's iou, updates iou, greater than the previous iou's score based on each iou score save new iou and other corresponding marking and coordinate values """ # Jaccard score. label = tf.concat([ gt_bboxes[:, i:i + 1, 1:2], gt_bboxes[:, i:i + 1, 2:3], gt_bboxes[:, i:i + 1, 3:4], gt_bboxes[:, i:i + 1, 4:5] ], axis=-1) bbox = tf.concat([ xmin_a[:, :, 0:1], ymin_a[:, :, 0:1], xmax_a[:, :, 0:1], ymax_a[:, :, 0:1] ], axis=-1) jaccard = jaccard_with_anchors( label, bbox ) # 计算每个batch的真实框与与arm decode生成的所有框的交并比(Calculate the intersection of the gt box of each batch with all boxes generated by the arm decode ) # Mask: check threshold + scores + no annotations + num_classes. mask = tf.greater( jaccard, feat_scores ) # 交并比是否比之前匹配的GTbox大(Intersection is larger than previous matching gtbox ) mask1 = tf.equal(y_true_arm[:, :, 0:1], 1) mask1 = tf.logical_and(mask1, tf.greater_equal(y_pred[:, :, 0:1], 0.99)) mask1 = tf.logical_not(mask1) mask = tf.logical_and(mask, mask1) imask = tf.cast(mask, tf.int32) # 转型 fmask = tf.cast(mask, tf.float32) # dtype float32 feat_labels = imask * tf.cast( gt_labels[:, i:i + 1, 0:1], tf.int32 ) + ( 1 - imask ) * feat_labels # 当imask为1时更新标签(1 - imask)即把交并比大的位置的mask变成0,其他位置变为1,变为0的位置更新标记值 feat_gtnum = imask * tf.cast(i, tf.int32) + ( 1 - imask ) * feat_gtnum # When imask is 1, the update tag (1-imask) changes the mask of the intersection and larger position to 0, the other positions to 1, and the position to 0 to update the tag value feat_scores = tf.where(mask, jaccard, feat_scores) feat_xmin = fmask * label[:, :, 0:1] + ( 1 - fmask ) * feat_xmin # 当fmask为1.0时更新坐标信息(Update coordinate information when fmask is 1.0 ) feat_ymin = fmask * label[:, :, 1:2] + (1 - fmask) * feat_ymin feat_xmax = fmask * label[:, :, 2:3] + (1 - fmask) * feat_xmax feat_ymax = fmask * label[:, :, 3:4] + (1 - fmask) * feat_ymax return [ i + 1, feat_labels, feat_scores, feat_gtnum, feat_xmin, feat_ymin, feat_xmax, feat_ymax ] i = 0 [ i, feat_labels, feat_scores, feat_gtnum, feat_xmin, feat_ymin, feat_xmax, feat_ymax ] = tf.while_loop( condition, body, # tf.while_loop是一个循环函数condition是循环条件,body是循环体 [ i, feat_labels, feat_scores, feat_gtnum, # 第三项是参数 feat_xmin, feat_ymin, feat_xmax, feat_ymax ]) def condition2(i, feat_labels, feat_scores, feat_matched, feat_gtnum, feat_xmin, feat_ymin, feat_xmax, feat_ymax): r = tf.less( tf.cast(i, dtype=tf.float32), gt_num_max ) # tf.shape(labels)GTbox的个数,当i<=tf.shape(labels)是返回True return r def body2( i, feat_labels, feat_scores, feat_matched, feat_gtnum, # 循环执行主体 feat_xmin, feat_ymin, feat_xmax, feat_ymax): """这一步操作和上一步类似,不过是为每个gtbox匹配一个最大iou的anchor,同时标记这个anchor,方便第二步为每个anchor匹配一个gtbox This step is similar to the previous step, except that the anchor, that matches a maximum iou for each gtbox is tagged at the same time that the anchor, is convenient for the second step to match a gtbox for each anchor """ #找寻每一个gtbox的最大iou anchor,然后标记成1保留,方便后续阈值过滤(Find the maximum iouanchor, for each gtbox and mark it as 1 reserved for subsequent threshold filtering ) mask = tf.equal(feat_gtnum, i) # 取出feat_scores里面对应每个gtbox的iou(Take out the iou of each gtbox in the feat_scores ) tmp = tf.where(mask, feat_scores, tf.zeros_like(feat_scores)) # 计算每个gtbox的iou最大的anchor(Calculate the maximum anchor of the iou for each gtbox) max_score = tf.reduce_max(tf.reshape(tmp, shape=[1, -1])) # 将其坐标做成模板(Make its coordinates a template ) mask = tf.equal(tmp, max_score) mask = tf.logical_and(mask, tf.greater(tmp, 0)) mask = tf.logical_and(mask, tf.not_equal(feat_matched, 1)) # 该模板在feat_matched里面标记为1(The template is marked 1 in the feat_matched ) imask = tf.cast(mask, tf.int32) # 转型 feat_matched = imask * tf.cast(1, tf.int32) + feat_matched # return [ i + 1, feat_labels, feat_scores, feat_matched, feat_gtnum, feat_xmin, feat_ymin, feat_xmax, feat_ymax ] i = 0 [ i, feat_labels, feat_scores, feat_matched, feat_gtnum, feat_xmin, feat_ymin, feat_xmax, feat_ymax ] = tf.while_loop( condition2, body2, # tf.while_loop是一个循环函数condition是循环条件,body是循环体 [ i, feat_labels, feat_scores, feat_matched, feat_gtnum, feat_xmin, feat_ymin, feat_xmax, feat_ymax ]) mask = tf.equal(feat_matched, 1) mask = tf.logical_or(mask, tf.greater_equal(feat_scores, 0.5)) feat_labels = tf.where(mask, feat_labels, tf.zeros_like(feat_labels)) feat_xmin = tf.where(mask, feat_xmin, tf.zeros_like(feat_xmin)) feat_ymin = tf.where(mask, feat_ymin, tf.zeros_like(feat_ymin)) feat_xmax = tf.where(mask, feat_xmax, tf.zeros_like(feat_xmax)) feat_ymax = tf.where(mask, feat_ymax, tf.zeros_like(feat_ymax)) feat_matched = tf.where(mask, 2 * tf.ones_like(feat_matched), feat_matched) # Transform to center / size. 转换回中心坐标以及宽高(Converted back to center coordinates and width and height ) feat_cy = (feat_ymax + feat_ymin) / 2. / 320 feat_cx = (feat_xmax + feat_xmin) / 2. / 320 feat_h = (feat_ymax - feat_ymin) / 320. feat_w = (feat_xmax - feat_xmin) / 320. prior_scaling = [0.1, 0.1, 0.2, 0.2] feat_cx = (feat_cx - cxpref) / (wpref * prior_scaling[0]) feat_cy = (feat_cy - cypref) / ( hpref * prior_scaling[1] ) # refine框中心与匹配的真实框中心坐标偏差(Central coordinate deviation between refine anchor and matching gt Box ) feat_w = tf.log(tf.maximum( (feat_w) / (wpref), 1e-15)) / prior_scaling[2] feat_h = tf.log(tf.maximum((feat_h) / (hpref), 1e-15)) / prior_scaling[ 3] # 高和宽的偏差(Deviation of height and width ) feat_labels1 = tf.cast(tf.one_hot(feat_labels, self.n_class, axis=-1), dtype=tf.int32) feat_labels_reshape = tf.reshape( feat_labels1, shape=[tf.shape(feat_labels1)[0], tf.shape(feat_labels1)[1], -1]) # 生成新的y_true用来计算odm部分的loss(Generate a new y_true to calculate the loss of the odm section ) y_refine = tf.concat([ tf.cast(feat_labels_reshape, dtype=tf.float32), feat_cx, feat_cy, feat_w, feat_h, cxpref, cypref, wpref, hpref, y_pred[:, :, 10:14] ], axis=-1) # odm total loss classification_loss = tf.to_float( self.log_loss( y_refine[:, :, :-12], y_pred[:, :, 14:-12])) # Output shape: (batch_size, n_boxes) localization_loss = tf.to_float( self.smooth_L1_loss( y_refine[:, :, self.n_class:self.n_class + 4], y_pred[:, :, -12:-8])) # Output shape: (batch_size, n_boxes) # 2: Compute the classification losses for the positive and negative targets. # Create masks for the positive and negative ground truth classes. negatives = y_refine[:, :, 0] # Tensor of shape (batch_size, n_boxes) positives = tf.to_float( tf.reduce_max(y_refine[:, :, 1:self.n_class], axis=-1)) # Tensor of shape (batch_size, n_boxes) mask = tf.equal(y_true_arm[:, :, 0], 1) mask = tf.logical_and(mask, tf.greater_equal(y_pred[:, :, 0], 0.99)) # # 过滤负样本中iou>0.3的样本,这部分样本不计入loss(Filter samples with iou > 0. 3 in negative samples, which are not included in loss) mask1 = tf.not_equal(feat_matched[:, :, 0], 2) mask1 = tf.logical_and(mask1, tf.greater_equal(feat_scores[:, :, 0], 0.3)) mask = tf.logical_or(mask, mask1) # 将正负样本中满足mask条件的样本过滤掉,不计算loss回传更新参数(Filter out samples satisfying mask condition in positive and negative samples without calculating loss return update parameters) positives = tf.where(mask, tf.zeros_like(positives), positives) negatives = tf.where(mask, tf.zeros_like(negatives), negatives) n_positive = tf.reduce_sum(positives) # Now mask all negative boxes and sum up the losses for the positive boxes PER batch item # (Keras loss functions must output one scalar loss value PER batch item, rather than just # one scalar for the entire batch, that's why we're not summing across all axes). pos_class_loss = tf.reduce_sum( classification_loss * positives, axis=-1) # Tensor of shape (batch_size,) # Compute the classification loss for the negative default boxes (if there are any). # First, compute the classification loss for all negative boxes. neg_class_loss_all = classification_loss * negatives # Tensor of shape (batch_size, n_boxes) n_neg_losses = tf.count_nonzero( neg_class_loss_all, dtype=tf.int32 ) # The number of non-zero loss entries in `neg_class_loss_all` # What's the point of `n_neg_losses`? For the next step, which will be to compute which negative boxes enter the classification # loss, we don't just want to know how many negative ground truth boxes there are, but for how many of those there actually is # a positive (i.e. non-zero) loss. This is necessary because `tf.nn.top-k()` in the function below will pick the top k boxes with # the highest losses no matter what, even if it receives a vector where all losses are zero. In the unlikely event that all negative # classification losses ARE actually zero though, this behavior might lead to `tf.nn.top-k()` returning the indices of positive # boxes, leading to an incorrect negative classification loss computation, and hence an incorrect overall loss computation. # We therefore need to make sure that `n_negative_keep`, which assumes the role of the `k` argument in `tf.nn.top-k()`, # is at most the number of negative boxes for which there is a positive classification loss. # Compute the number of negative examples we want to account for in the loss. # We'll keep at most `self.neg_pos_ratio` times the number of positives in `y_true`, but at least `self.n_neg_min` (unless `n_neg_loses` is smaller). n_negative_keep = tf.minimum( tf.maximum(self.neg_pos_ratio * tf.to_int32(n_positive), self.n_neg_min), n_neg_losses) # In the unlikely case when either (1) there are no negative ground truth boxes at all # or (2) the classification loss for all negative boxes is zero, return zero as the `neg_class_loss`. def f1(): return tf.zeros([batch_size]) # Otherwise compute the negative loss. def f2(): # Now we'll identify the top-k (where k == `n_negative_keep`) boxes with the highest confidence loss that # belong to the background class in the ground truth data. Note that this doesn't necessarily mean that the model # predicted the wrong class for those boxes, it just means that the loss for those boxes is the highest. # To do this, we reshape `neg_class_loss_all` to 1D... neg_class_loss_all_1D = tf.reshape( neg_class_loss_all, [-1]) # Tensor of shape (batch_size * n_boxes,) # ...and then we get the indices for the `n_negative_keep` boxes with the highest loss out of those... values, indices = tf.nn.top_k( neg_class_loss_all_1D, k=n_negative_keep, sorted=False) # We don't need them sorted. # ...and with these indices we'll create a mask... negatives_keep = tf.scatter_nd( indices=tf.expand_dims(indices, axis=1), updates=tf.ones_like(indices, dtype=tf.int32), shape=tf.shape(neg_class_loss_all_1D )) # Tensor of shape (batch_size * n_boxes,) negatives_keep = tf.to_float( tf.reshape(negatives_keep, [batch_size, n_boxes ])) # Tensor of shape (batch_size, n_boxes) # ...and use it to keep only those boxes and mask all other classification losses neg_class_loss = tf.reduce_sum( classification_loss * negatives_keep, axis=-1) # Tensor of shape (batch_size,) return neg_class_loss neg_class_loss = tf.cond(tf.equal(n_neg_losses, tf.constant(0)), f1, f2) class_loss_odm = pos_class_loss + neg_class_loss # Tensor of shape (batch_size,) # 3: Compute the localization loss for the positive targets. # We don't compute a localization loss for negative predicted boxes (obviously: there are no ground truth boxes they would correspond to). loc_loss_odm = tf.reduce_sum(localization_loss * positives, axis=-1) # Tensor of shape (batch_size,) # 4: Compute the total loss. total_loss_odm = (class_loss_odm + self.alpha * loc_loss_odm) / tf.maximum( 1.0, n_positive) # In case `n_positive == 0` # Keras has the annoying habit of dividing the loss by the batch size, which sucks in our case # because the relevant criterion to average our loss over is the number of positive boxes in the batch # (by which we're dividing in the line above), not the batch size. So in order to revert Keras' averaging # over the batch size, we'll have to multiply by it. # 将两部分loss相加(Add two parts of loss ) total_loss = (total_loss_odm + total_loss_arm) * tf.to_float(batch_size) return total_loss
def build(): """Builds the Tensorflow graph.""" inputs, labels, lengths = None, None, None if mode in ('train', 'eval'): if isinstance(no_event_label, numbers.Number): label_shape = [] else: label_shape = [len(no_event_label)] inputs, labels, lengths = magenta.common.get_padded_batch( sequence_example_file_paths, hparams.batch_size, input_size, label_shape=label_shape, shuffle=mode == 'train') elif mode == 'generate': inputs = tf.placeholder(tf.float32, [hparams.batch_size, None, input_size]) if isinstance(encoder_decoder, magenta.music.OneHotIndexEventSequenceEncoderDecoder): expanded_inputs = tf.one_hot( tf.cast(tf.squeeze(inputs, axis=-1), tf.int64), encoder_decoder.input_depth) else: expanded_inputs = inputs dropout_keep_prob = 1.0 if mode == 'generate' else hparams.dropout_keep_prob if hparams.use_cudnn: outputs, initial_state, final_state = make_cudnn( expanded_inputs, hparams.rnn_layer_sizes, hparams.batch_size, mode, dropout_keep_prob=dropout_keep_prob, residual_connections=hparams.residual_connections) else: cell = make_rnn_cell( hparams.rnn_layer_sizes, dropout_keep_prob=dropout_keep_prob, attn_length=hparams.attn_length, residual_connections=hparams.residual_connections) initial_state = cell.zero_state(hparams.batch_size, tf.float32) outputs, final_state = tf.nn.dynamic_rnn( cell, inputs, sequence_length=lengths, initial_state=initial_state, swap_memory=True) outputs_flat = magenta.common.flatten_maybe_padded_sequences( outputs, lengths) if isinstance(num_classes, numbers.Number): num_logits = num_classes else: num_logits = sum(num_classes) logits_flat = contrib_layers.linear(outputs_flat, num_logits) if mode in ('train', 'eval'): labels_flat = magenta.common.flatten_maybe_padded_sequences( labels, lengths) if isinstance(num_classes, numbers.Number): softmax_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels_flat, logits=logits_flat) predictions_flat = tf.argmax(logits_flat, axis=1) else: logits_offsets = np.cumsum([0] + num_classes) softmax_cross_entropy = [] predictions = [] for i in range(len(num_classes)): softmax_cross_entropy.append( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels_flat[:, i], logits=logits_flat[:, logits_offsets[i]: logits_offsets[i + 1]])) predictions.append( tf.argmax( logits_flat[:, logits_offsets[i]:logits_offsets[i + 1]], axis=1)) predictions_flat = tf.stack(predictions, 1) correct_predictions = tf.to_float( tf.equal(labels_flat, predictions_flat)) event_positions = tf.to_float( tf.not_equal(labels_flat, no_event_label)) no_event_positions = tf.to_float( tf.equal(labels_flat, no_event_label)) # Compute the total number of time steps across all sequences in the # batch. For some models this will be different from the number of RNN # steps. def batch_labels_to_num_steps(batch_labels, lengths): num_steps = 0 for labels, length in zip(batch_labels, lengths): num_steps += encoder_decoder.labels_to_num_steps( labels[:length]) return np.float32(num_steps) num_steps = tf.py_func(batch_labels_to_num_steps, [labels, lengths], tf.float32) if mode == 'train': loss = tf.reduce_mean(softmax_cross_entropy) perplexity = tf.exp(loss) accuracy = tf.reduce_mean(correct_predictions) event_accuracy = ( tf.reduce_sum(correct_predictions * event_positions) / tf.reduce_sum(event_positions)) no_event_accuracy = ( tf.reduce_sum(correct_predictions * no_event_positions) / tf.reduce_sum(no_event_positions)) loss_per_step = tf.reduce_sum( softmax_cross_entropy) / num_steps perplexity_per_step = tf.exp(loss_per_step) optimizer = tf.train.AdamOptimizer( learning_rate=hparams.learning_rate) train_op = contrib_slim.learning.create_train_op( loss, optimizer, clip_gradient_norm=hparams.clip_norm) tf.add_to_collection('train_op', train_op) vars_to_summarize = { 'loss': loss, 'metrics/perplexity': perplexity, 'metrics/accuracy': accuracy, 'metrics/event_accuracy': event_accuracy, 'metrics/no_event_accuracy': no_event_accuracy, 'metrics/loss_per_step': loss_per_step, 'metrics/perplexity_per_step': perplexity_per_step, } elif mode == 'eval': vars_to_summarize, update_ops = contrib_metrics.aggregate_metric_map( { 'loss': tf.metrics.mean(softmax_cross_entropy), 'metrics/accuracy': tf.metrics.accuracy(labels_flat, predictions_flat), 'metrics/per_class_accuracy': tf.metrics.mean_per_class_accuracy( labels_flat, predictions_flat, num_classes), 'metrics/event_accuracy': tf.metrics.recall(event_positions, correct_predictions), 'metrics/no_event_accuracy': tf.metrics.recall(no_event_positions, correct_predictions), 'metrics/loss_per_step': tf.metrics.mean(tf.reduce_sum(softmax_cross_entropy) / num_steps, weights=num_steps), }) for updates_op in update_ops.values(): tf.add_to_collection('eval_ops', updates_op) # Perplexity is just exp(loss) and doesn't need its own update op. vars_to_summarize['metrics/perplexity'] = tf.exp( vars_to_summarize['loss']) vars_to_summarize['metrics/perplexity_per_step'] = tf.exp( vars_to_summarize['metrics/loss_per_step']) for var_name, var_value in six.iteritems(vars_to_summarize): tf.summary.scalar(var_name, var_value) tf.add_to_collection(var_name, var_value) elif mode == 'generate': temperature = tf.placeholder(tf.float32, []) if isinstance(num_classes, numbers.Number): softmax_flat = tf.nn.softmax( tf.div(logits_flat, tf.fill([num_classes], temperature))) softmax = tf.reshape(softmax_flat, [hparams.batch_size, -1, num_classes]) else: logits_offsets = np.cumsum([0] + num_classes) softmax = [] for i in range(len(num_classes)): sm = tf.nn.softmax( tf.div( logits_flat[:, logits_offsets[i]:logits_offsets[i + 1]], tf.fill([num_classes[i]], temperature))) sm = tf.reshape(sm, [hparams.batch_size, -1, num_classes[i]]) softmax.append(sm) tf.add_to_collection('inputs', inputs) tf.add_to_collection('temperature', temperature) tf.add_to_collection('softmax', softmax) # Flatten state tuples for metagraph compatibility. for state in tf_nest.flatten(initial_state): tf.add_to_collection('initial_state', state) for state in tf_nest.flatten(final_state): tf.add_to_collection('final_state', state)
def cond(i, base_state, high_states, prev_y, prev_emb, y_array): return tf.logical_and( tf.less(i, self.translation_maxlen), tf.reduce_any(tf.not_equal(prev_y, 0)))
def model_fn(features, targets, mode): """Creates the prediction, loss, and train ops. Args: features: A dictionary of tensors keyed by the feature name. targets: A tensor representing the labels (targets). mode: The execution mode, as defined in tf.contrib.learn.ModeKeys. Returns: A tuple consisting of the prediction, loss, and train_op. """ # Deep-copy the model hparams between modes to eliminate # side-effects caused by abuse of the linked problem_hparams # objects which are used to share modality objects between # problems. We do not want to share the modality objects between # modes, since the modality objects may decide to do something # mode-specific. A better fix would be to stop abusing the # hparams in this way and instead use a separate dictionary to # share the modality objects between problems. This dictionary # could be created once per mode and passed to the constructor of # t2t_model. my_hp = copy.deepcopy(hparams) if mode == tf.contrib.learn.ModeKeys.INFER: if FLAGS.decode_interactive: features = _interactive_input_tensor_to_features_dict( features, my_hp) elif FLAGS.decode_from_file: features = _decode_input_tensor_to_features_dict( features, my_hp) # A dictionary containing: # - problem_choice: A Tensor containing an integer indicating which problem # was selected for this run. # - predictions: A Tensor containing the model's output predictions. run_info = dict() run_info["problem_choice"] = features["problem_choice"] if targets is not None: features["targets"] = targets dp = devices.data_parallelism() # Add input statistics for incoming features. with tf.name_scope("input_stats"): for (k, v) in six.iteritems(features): if isinstance(v, tf.Tensor) and v.get_shape().ndims > 1: tf.summary.scalar("%s_batch" % k, tf.shape(v)[0] // dp.n) tf.summary.scalar("%s_length" % k, tf.shape(v)[1]) nonpadding = tf.to_float(tf.not_equal(v, 0)) tf.summary.scalar("%s_nonpadding_tokens" % k, tf.reduce_sum(nonpadding)) tf.summary.scalar("%s_nonpadding_fraction" % k, tf.reduce_mean(nonpadding)) tf.get_variable_scope().set_initializer(initializer()) train = mode == tf.contrib.learn.ModeKeys.TRAIN # Get multi-problem logits and loss based on features["problem_choice"]. loss_variable_names = [] def nth_model(n): """Build the model for the n-th problem, plus some added variables.""" model_class = registry.model(model)( my_hp, mode, my_hp.problems[n], n, dp, devices.ps_devices(all_workers=True)) if mode == tf.contrib.learn.ModeKeys.INFER: return model_class.infer( features, beam_size=FLAGS.decode_beam_size, top_beams=(FLAGS.decode_beam_size if FLAGS.decode_return_beams else 1), last_position_only=FLAGS.decode_use_last_position_only, alpha=FLAGS.decode_alpha, decode_length=FLAGS.decode_extra_length) # In distributed mode, we build graph for problem=0 and problem=worker_id. skipping_is_on = my_hp.problem_choice == "distributed" and train problem_worker_id = FLAGS.worker_id % len(my_hp.problems) skip_this_one = n != 0 and n % FLAGS.worker_replicas != problem_worker_id # On worker 0 also build graph for problems <= 1. # TODO(lukaszkaiser): why is this hack needed for variables init? Repair. skip_this_one = skip_this_one and (FLAGS.worker_id != 0 or n > 1) if (FLAGS.eval_run_autoregressive and mode == tf.contrib.learn.ModeKeys.EVAL): sharded_logits, losses_dict = model_class.eval_autoregressive( features) else: sharded_logits, losses_dict = model_class.model_fn( features, skip=(skipping_is_on and skip_this_one)) with tf.variable_scope("losses_avg"): total_loss, ops = 0.0, [] for loss_key, loss_value in six.iteritems(losses_dict): loss_name = "problem_%d/%s_loss" % (n, loss_key) loss_moving_avg = tf.get_variable(loss_name, initializer=100.0, trainable=False) loss_variable_names.append(loss_name) ops.append( loss_moving_avg.assign(loss_moving_avg * 0.9 + loss_value * 0.1)) total_loss += loss_value with tf.variable_scope(tf.get_variable_scope(), reuse=True): # Total loss was already constructed on input. loss_moving_avg = tf.get_variable("problem_%d/total_loss" % n) ops.append( loss_moving_avg.assign(loss_moving_avg * 0.9 + total_loss * 0.1)) with tf.variable_scope( "train_stats"): # Count steps for this problem. problem_steps = tf.get_variable("problem_%d_steps" % n, initializer=0, trainable=False) ops.append(problem_steps.assign_add(1)) with tf.control_dependencies(ops): # Make sure the ops run. # Ensure the loss is a scalar here. total_loss = tf.reshape(total_loss, [], name="total_loss_control_id") return [total_loss ] + sharded_logits # Need to flatten for cond later. result_list = input_fn_builder.cond_on_index( nth_model, features["problem_choice"], 0, len(my_hp.problems) - 1) if mode == tf.contrib.learn.ModeKeys.INFER: # Beam search in sequence model returns both decodes withe key "outputs" # and scores with they key "scores". If return list is a dict, we expect # that it will have keys "outputs", a tensor of int32 and scores, a # tensor of floats. This is useful if we want to return scores from # estimator.predict if not isinstance(result_list, dict): ret = {"outputs": result_list}, None, None else: ret = { "outputs": result_list["outputs"], "scores": result_list["scores"] }, None, None if "inputs" in features: ret[0]["inputs"] = features["inputs"] if "infer_targets" in features: ret[0]["targets"] = features["infer_targets"] return ret sharded_logits, total_loss = result_list[1:], result_list[0] if mode == tf.contrib.learn.ModeKeys.EVAL: logits = tf.concat(sharded_logits, 0) if FLAGS.eval_print: logits = tf.Print(logits, [features["inputs"], logits], "EVAL PRINT", summarize=10000) # For evaluation, return the logits layer as our predictions. run_info["predictions"] = logits train_op = None return run_info, total_loss, None assert mode == tf.contrib.learn.ModeKeys.TRAIN # Some training statistics. with tf.name_scope("training_stats"): learning_rate = my_hp.learning_rate * learning_rate_decay() learning_rate /= math.sqrt(float(FLAGS.worker_replicas)) tf.summary.scalar("learning_rate", learning_rate) global_step = tf.to_float(tf.contrib.framework.get_global_step()) for n in xrange(len(my_hp.problems)): names_and_vars = [] with tf.variable_scope("losses_avg", reuse=True): total_loss_var = tf.get_variable("problem_%d/total_loss" % n) names_and_vars.append(("total_loss", total_loss_var)) with tf.variable_scope("losses_avg", reuse=True): for loss_name in loss_variable_names: if loss_name.startswith("problem_%d/" % n): loss_var = tf.get_variable(loss_name) loss_suffix = loss_name[loss_name.index("/") + 1:] names_and_vars.append((loss_suffix, loss_var)) for (loss_name, loss_var) in names_and_vars: tf.summary.scalar("loss_avg_%d/%s" % (n, loss_name), loss_var) with tf.variable_scope("train_stats", reuse=True): nth_steps = tf.get_variable("problem_%d_steps" % n, dtype=tf.int32) tf.summary.scalar("problem_%d_frequency" % n, tf.to_float(nth_steps) / (global_step + 1.0)) # Log trainable weights and add decay. total_size, weight_decay_loss = 0, 0.0 all_weights = {v.name: v for v in tf.trainable_variables()} for v_name in sorted(list(all_weights)): v = all_weights[v_name] v_size = int(np.prod(np.array(v.shape.as_list()))) tf.logging.info("Weight %s\tshape %s\tsize %d", v.name[:-2].ljust(80), str(v.shape).ljust(20), v_size) total_size += v_size if my_hp.weight_decay > 0.0 and len(v.shape.as_list()) > 1: # Add weight regularization if set and the weight is not a bias (dim>1). with tf.device(v._ref().device): # pylint: disable=protected-access v_loss = tf.nn.l2_loss(v) / v_size weight_decay_loss += v_loss is_body = len(v_name) > 5 and v_name[:5] == "body/" if my_hp.weight_noise > 0.0 and is_body: # Add weight noise if set in my_hp. with tf.device(v._ref().device): # pylint: disable=protected-access scale = learning_rate * 0.001 noise = tf.truncated_normal( v.shape) * my_hp.weight_noise * scale noise_op = v.assign_add(noise) with tf.control_dependencies([noise_op]): total_loss = tf.identity(total_loss) tf.logging.info("Total trainable variables size: %d", total_size) if my_hp.weight_decay > 0.0: total_loss += weight_decay_loss * my_hp.weight_decay total_loss = tf.identity(total_loss, name="total_loss") # Define the train_op for the TRAIN mode. opt = _ConditionalOptimizer(my_hp.optimizer, learning_rate, my_hp) tf.logging.info("Computing gradients for global model_fn.") opt_summaries = ["learning_rate", "loss"] if hparams.summarize_grads: opt_summaries.extend(["gradients", "gradient_norm"]) train_op = tf.contrib.layers.optimize_loss( name="training", loss=total_loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=learning_rate, clip_gradients=my_hp.clip_grad_norm or None, gradient_noise_scale=hparams.grad_noise_scale or None, optimizer=opt, summaries=opt_summaries, colocate_gradients_with_ops=True) # Remove summaries that will fail to run because they are in conditionals. # TODO(cwhipkey): Test with this code removed, later in 2017. summaries = tf.get_collection_ref(tf.GraphKeys.SUMMARIES) for i in range(len(summaries) - 1, -1, -1): if summaries[i].name.startswith("cond_"): del summaries[i] tf.logging.info("Global model_fn finished.") return run_info, total_loss, train_op
def encoding_graph_mt(encoder_output_src, features, mode, params): if mode != "train": params.residual_dropout = 0.0 params.attention_dropout = 0.0 params.relu_dropout = 0.0 params.label_smoothing = 0.0 n = params.sc_num batch_size = tf.shape(features["source"])[0] dtype = tf.get_variable_scope().dtype hidden_size = params.hidden_size src_mask = tf.sequence_mask(features["source_length"], maxlen=tf.shape(features["source"])[1], dtype=dtype or tf.float32) src_attn_bias = layers.attention.attention_bias(src_mask, "masking", dtype=dtype) max_len = 0 mt_seqs = [] for i in range(n): mt_seqs.append(features["mt_%d" % i]) max_len = tf.maximum(max_len, tf.shape(mt_seqs[i])[1]) for i in range(n): mt_seqs[i] = tf.concat([mt_seqs[i], tf.zeros([batch_size, max_len-tf.shape(mt_seqs[i])[1]], dtype=tf.int32)], axis=1) mt_lens = [] for i in range(n): mt_lens.append(features["mt_length_%d" % i]) mt_seq = tf.concat(mt_seqs, axis=0) mt_len = tf.concat(mt_lens, axis=0) mt_mask = tf.sequence_mask(mt_len, maxlen=max_len, dtype=dtype or tf.float32) tvocab = params.vocabulary["target"] tgt_vocab_size = len(tvocab) initializer = tf.random_normal_initializer(0.0, params.hidden_size ** -0.5) if params.shared_source_target_embedding: with tf.variable_scope(tf.get_variable_scope(), reuse=True): tgt_embedding = tf.get_variable("weights", [tgt_vocab_size, hidden_size], initializer=initializer) else: tgt_embedding = tf.get_variable("target_embedding", [tgt_vocab_size, hidden_size], initializer=initializer) bias = tf.get_variable("mt_bias", [hidden_size]) inputs = tf.gather(tgt_embedding, mt_seq) if params.multiply_embedding_mode == "sqrt_depth": inputs = inputs * (hidden_size ** 0.5) inputs = inputs * tf.expand_dims(mt_mask, -1) encoder_input = tf.nn.bias_add(inputs, bias) enc_attn_bias = layers.attention.attention_bias(mt_mask, "masking", dtype=dtype) if params.position_info_type == 'absolute': encoder_input = layers.attention.add_timing_signal(encoder_input) if params.residual_dropout: keep_prob = 1.0 - params.residual_dropout encoder_input = tf.nn.dropout(encoder_input, keep_prob) all_layer_outputs = transformer_encoder(encoder_input, enc_attn_bias, params, scope="mt_encoder", get_all_layer=True, memory_src=encoder_output_src, mem_bias_src=src_attn_bias) all_layer_outputs = tf.stack(all_layer_outputs, axis=1) # (bs, nl, lk, hs) # mt_seq = tf.reshape(mt_seq, [n, batch_size, max_len]) mt_seq = tf.transpose(mt_seq, [1, 0, 2]) mt_seq = tf.reshape(mt_seq, [batch_size, n*max_len]) all_layer_outputs = tf.reshape(all_layer_outputs, [n, batch_size, params.num_encoder_layers, max_len, hidden_size]) all_layer_outputs = tf.transpose(all_layer_outputs, [1, 2, 0, 3, 4]) all_layer_outputs = tf.reshape(all_layer_outputs, [batch_size, params.num_encoder_layers, n*max_len, hidden_size]) encoder_output = all_layer_outputs[:,-1,:,:] # [bs, n*lk, hs] mt_mask = tf.reshape(mt_mask, [n, batch_size, max_len]) mt_mask = tf.transpose(mt_mask, [1, 0, 2]) mt_mask = tf.reshape(mt_mask, [batch_size, n*max_len]) enc_attn_bias = layers.attention.attention_bias(mt_mask, "masking", dtype=dtype) # (bs, 1, 1, lk) dot_product = tf.matmul(all_layer_outputs, all_layer_outputs, transpose_b=True) # (bs, nl, lk, lk) dot_sim = dot_product * (hidden_size ** -0.5) # (bs, nl, lk, lk) dot_sim = dot_sim + enc_attn_bias dot_sim = tf.reshape(dot_sim, [batch_size, params.num_encoder_layers, n*max_len, n, max_len]) dot_sim = tf.nn.softmax(dot_sim, axis=-1) dot_sim = tf.reshape(dot_sim, [batch_size, params.num_encoder_layers, n*max_len, n*max_len]) # (bs, nl, lk, lk) similarity = dot_sim tag_vector = tf.concat([tf.ones([max_len, 1])*i for i in range(n)], axis=0) # (lk, 1) mt_mt_mask = tag_vector - tf.transpose(tag_vector) # (lk, lk) mt_mt_mask = tf.cast(tf.not_equal(mt_mt_mask, 0), tf.float32) # (lk, lk) mt_mt_mask = tf.expand_dims(mt_mt_mask, axis=0) # (1, lk, lk) mt_mt_mask = mt_mt_mask * tf.expand_dims(mt_mask, axis=1) # (bs, lk, lk) mt_mt_mask = tf.expand_dims(mt_mt_mask, axis=1) # (bs, 1, lk, lk) similarity = similarity * mt_mt_mask # (bs, nl, lk, lk) # return mt_seq, encoder_output, enc_attn_bias, similarity
tf.gather(logit, tf.where(tf.equal(0, rem))[:, 0]) - tf.gather(logit, tf.where(tf.equal(1, rem))[:, 0])) - tf.gather(logit, tf.where(tf.equal(2, rem))[:, 0]))**2) #reg = tf.reduce_mean(tf.abs(tf.abs(tf.gather(logit, tf.where(tf.equal(0, rem))[:, 0]) - #tf.gather(logit, tf.where(tf.equal(1, rem))[:, 0])) - #tf.gather(logit, tf.where(tf.equal(2, rem))[:, 0]))) loss = tf.losses.sigmoid_cross_entropy([[1]], logit) triplet_loss = tf.losses.sigmoid_cross_entropy( [[1]], tf.gather( logit, tf.where(tf.not_equal( 2, rem))[:, 0])) #* 1/i_f #+ tf.losses.get_regularization_loss() learning_rate = tf.placeholder(tf.float32, [], name="learning_rate") trainer = tf.train.AdamOptimizer(learning_rate) grads_and_vars = trainer.compute_gradients(loss) train_op = trainer.apply_gradients(grads_and_vars) grads_and_vars_if = trainer.compute_gradients(loss - lambda_ * i_f) train_op_if = trainer.apply_gradients(grads_and_vars_if) grads_and_vars_if_sq = trainer.compute_gradients(loss + lambda_ * (1 - i_f)**2) train_op_if_sq = trainer.apply_gradients(grads_and_vars_if) grads_and_vars_triplets = trainer.compute_gradients(triplet_loss +
def predict_all_labels(embedding, num_clusters, kmeans_iterations, prototype_features, prototype_semantic_labels, prototype_instance_labels, k_in_nearest_neighbors, panoptic_label_divisor, class_has_instances_list): """Predicts panoptic, semantic, and instance labels using the vMF embedding. Args: embedding: A 4-D float tensor with shape `[batch, height, width, embedding_dim]`. num_clusters: A list of 2 integers for number of clusters in y and x axes. kmeans_iterations: Number of iterations for the k-means clustering. prototype_features: A 2-D float tensor for trained prototype features with shape `[num_prototypes, embedding_dim]`. prototype_semantic_labels: A 1-D integer tensor for trained prototype semantic labels with length `[num_prototypes]`. prototype_instance_labels: A 1-D integer tensor for trained prototype instance labels with length `[num_prototypes]`. k_in_nearest_neighbors: The number of nearest neighbors to search, or k in k-nearest neighbors. panoptic_label_divisor: An integer constant to separate semantic and instance labels from panoptic labels. class_has_instances_list: A list of thing classes, which have instances. Returns: panoptic_predictions: A 1-D integer tensor for pixel panoptic predictions. semantic_predictions: A 1-D integer tensor for pixel semantic predictions. instance_predictions: A 1-D integer tensor for pixel instance predictions. """ # Generate location features and combine them with embedding features. shape = embedding.get_shape().as_list() location_features = common_utils.generate_location_features( [shape[1], shape[2]], 'float') location_features = tf.expand_dims(location_features, 0) embedding_with_location = tf.concat([embedding, location_features], 3) embedding_with_location = common_utils.normalize_embedding( embedding_with_location) # Kmeans clustering. cluster_labels = common_utils.kmeans(embedding_with_location, num_clusters, kmeans_iterations) test_prototypes = common_utils.calculate_prototypes_from_labels( embedding, cluster_labels) # Predict semantic and instance labels. semantic_predictions, instance_predictions = predict_semantic_instance_labels( cluster_labels, test_prototypes, prototype_features, prototype_semantic_labels, prototype_instance_labels, k_in_nearest_neighbors) # Refine instance labels. class_has_instances_list = tf.reshape(class_has_instances_list, [1, 1, 1, -1]) instance_predictions = tf.where( tf.reduce_all(tf.not_equal(tf.expand_dims(semantic_predictions, 3), class_has_instances_list), axis=3), tf.zeros_like(instance_predictions), instance_predictions) # Combine semantic and panoptic predictions as panoptic predictions. panoptic_predictions = (semantic_predictions * panoptic_label_divisor + instance_predictions) return (panoptic_predictions, semantic_predictions, instance_predictions, cluster_labels)
def test_top_k_top_p_filtering(self): logits = tf.convert_to_tensor( [ [ 8.2220991, # 3rd highest value; idx. 0 -0.5620044, 5.23229752, 4.0386393, -6.8798378, -0.54785802, -3.2012153, 2.92777176, 1.88171953, 7.35341276, # 5th highest value; idx. 9 8.43207833, # 2nd highest value; idx. 10 -9.85711836, -5.96209236, -1.13039161, -7.1115294, -0.8369633, -5.3186408, 7.06427407, 0.81369344, -0.82023817, -5.9179796, 0.58813443, -6.99778438, 4.71551189, -0.18771637, 7.44020759, # 4th highest value; idx. 25 9.38450987, # 1st highest value; idx. 26 2.12662941, -9.32562038, 2.35652522, ], # cummulative prob of 5 highest values <= 0.6 [ 0.58425518, 4.53139238, -5.57510464, -6.28030699, -7.19529503, -4.02122551, 1.39337037, -6.06707057, 1.59480517, -9.643119, 0.03907799, 0.67231762, -8.88206726, 6.27115922, # 4th highest value; idx. 13 2.28520723, 4.82767506, 4.30421368, 8.8275313, # 2nd highest value; idx. 17 5.44029958, # 5th highest value; idx. 18 -4.4735794, 7.38579536, # 3rd highest value; idx. 20 -2.91051663, 2.61946077, -2.5674762, -9.48959302, -4.02922645, -1.35416918, 9.67702323, # 1st highest value; idx. 27 -5.89478553, 1.85370467, ], # cummulative prob of 5 highest values <= 0.6 ], dtype=tf.float32, ) non_inf_expected_idx = tf.convert_to_tensor( [[0, 0], [0, 9], [0, 10], [0, 25], [0, 26], [1, 13], [1, 17], [1, 18], [1, 20], [1, 27]], dtype=tf.int32, ) # expected non filtered idx as noted above non_inf_expected_output = tf.convert_to_tensor( [8.222099, 7.3534126, 8.432078, 7.4402075, 9.38451, 6.271159, 8.827531, 5.4402995, 7.3857956, 9.677023], dtype=tf.float32, ) # expected non filtered values as noted above output = tf_top_k_top_p_filtering(logits, top_k=10, top_p=0.6, min_tokens_to_keep=4) non_inf_output = output[output != -float("inf")] non_inf_idx = tf.cast( tf.where(tf.not_equal(output, tf.constant(-float("inf"), dtype=tf.float32))), dtype=tf.int32, ) tf.debugging.assert_near(non_inf_output, non_inf_expected_output, rtol=1e-12) tf.debugging.assert_equal(non_inf_idx, non_inf_expected_idx)
def build_simple_vte_model_relu_hi(premise_input, hypothesis_input, img_features_input, dropout_input, num_tokens, num_labels, embeddings, embeddings_size, train_embeddings, rnn_hidden_size, multimodal_fusion_hidden_size, classification_hidden_size): premise_length = tf.cast( tf.reduce_sum( tf.cast(tf.not_equal(premise_input, tf.zeros_like(premise_input, dtype=tf.int32)), tf.int64), 1 ), tf.int32 ) hypothesis_length = tf.cast( tf.reduce_sum( tf.cast(tf.not_equal(hypothesis_input, tf.zeros_like(hypothesis_input, dtype=tf.int32)), tf.int64), 1 ), tf.int32 ) if embeddings is not None: embedding_matrix = tf.get_variable( "embedding_matrix", shape=(num_tokens, embeddings_size), initializer=glove_embeddings_initializer(embeddings), trainable=train_embeddings ) print("Loaded GloVe embeddings!") else: embedding_matrix = tf.get_variable( "embedding_matrix", shape=(num_tokens, embeddings_size), initializer=tf.random_normal_initializer(stddev=0.05), trainable=train_embeddings ) hypothesis_embeddings = tf.nn.embedding_lookup(embedding_matrix, hypothesis_input) lstm_cell = DropoutWrapper( tf.nn.rnn_cell.LSTMCell(rnn_hidden_size), input_keep_prob=dropout_input, output_keep_prob=dropout_input ) hypothesis_outputs, hypothesis_final_states = tf.nn.dynamic_rnn( cell=lstm_cell, inputs=hypothesis_embeddings, sequence_length=hypothesis_length, dtype=tf.float32 ) normalized_img_features = tf.nn.l2_normalize(img_features_input, dim=1) img_hidden_layer = tf.nn.dropout( tf.contrib.layers.fully_connected(normalized_img_features, multimodal_fusion_hidden_size), keep_prob=dropout_input ) hypothesis_hidden_layer = tf.nn.dropout( tf.contrib.layers.fully_connected(hypothesis_final_states.h, multimodal_fusion_hidden_size), keep_prob=dropout_input ) hypothesis_img_multimodal_fusion = tf.multiply(hypothesis_hidden_layer, img_hidden_layer) first_layer = tf.nn.dropout( tf.contrib.layers.fully_connected(hypothesis_img_multimodal_fusion, classification_hidden_size), keep_prob=dropout_input ) second_layer = tf.nn.dropout( tf.contrib.layers.fully_connected(first_layer, classification_hidden_size), keep_prob=dropout_input ) third_layer = tf.nn.dropout( tf.contrib.layers.fully_connected(second_layer, classification_hidden_size), keep_prob=dropout_input ) return tf.contrib.layers.fully_connected( third_layer, num_labels, activation_fn=None )
def train_batch(self, source_charseq_ids, source_charseqs, target_charseq_ids, target_charseqs): # TODO: Modify target_charseqs by appending EOW; only the version with appended EOW is used from now on. print("Train batch called") target_charseqs = self._append_eow(target_charseqs) with tf.GradientTape() as tape: # TODO: Embed source charseqs embedded = self._model.source_embeddings(source_charseqs) # TODO: Run self._model.source_rnn on the embedded sequences, returning outputs in `source_states`. source_states = self._model.source_rnn(embedded) # Copy the source_states to corresponding batch places, and then flatten it source_mask = tf.not_equal(source_charseq_ids, 0) source_states = tf.boolean_mask(tf.gather(source_states, source_charseq_ids), source_mask) targets = tf.boolean_mask(tf.gather(target_charseqs, target_charseq_ids), source_mask) # tape.watch(self._model.variables) class DecoderTraining(decoder.BaseDecoder): @property def batch_size(self): # TODO: Return batch size of self._source_states, using tf.shape return tf.shape(self._source_states)[0] @property def output_size(self): # TODO: Return number of the generated logits return tf.shape(targets)[1] @property def output_dtype(self): # TODO: Return the type of the generated logits return tf.float32 def initialize(self, layer_inputs, initial_state=None, **kwargs): self._model, self._source_states, self._targets = layer_inputs # TODO: Define `finished` as a vector of self.batch_size of `False` [see tf.fill]. # TODO: Define `inputs` as a vector of self.batch_size of MorphoDataset.Factor.BOW [see tf.fill], # embedded using self._model.target_embedding # TODO: Define `states` as self._source_states finished = tf.fill([self.batch_size],False) inputs = self._model.target_embedding(tf.fill([self.batch_size],MorphoDataset.Factor.BOW)) states = self._source_states return finished, inputs, states def step(self, time, inputs, states): # TODO: Pass `inputs` and `[states]` through self._model.target_rnn_cell, generating # `outputs, [states]`. # TODO: Overwrite `outputs` by passing them through self._model.target_output_layer, # TODO: Define `next_inputs` by embedding `time`-th words from `self._targets`. # TODO: Define `finished` as True if `time`-th word from `self._targets` is EOW, False otherwise. # Again, no == or !=. outputs, [states] = self._model.target_rnn_cell(inputs=inputs,states=[states]) outputs = self._model.target_output_layer(outputs) next_inputs = self._model.target_embedding(self._targets[:, time]) finished = tf.equal(self._targets[:, time], MorphoDataset.Factor.EOW) return outputs, states, next_inputs, finished output_layer, _, _ = DecoderTraining()([self._model, source_states, targets]) # print(self._model.variables) # TODO: Compute loss. Use only nonzero `targets` as a mask. mask = tf.not_equal(targets,0) loss = self._loss(targets,output_layer,mask) gradients = tape.gradient(loss, self._model.variables) self._optimizer.apply_gradients(zip(gradients, self._model.variables)) tf.summary.experimental.set_step(self._optimizer.iterations) with self._writer.as_default(): for name, metric in self._metrics_training.items(): metric.reset_states() if name == "loss": metric(loss) else: metric(targets, output_layer, tf.not_equal(targets, 0)) tf.summary.scalar("train/{}".format(name), metric.result()) predictions = tf.math.argmax(output_layer, axis=2) return predictions
def symbols_to_logits_fn(ids, i, cache): """Generate logits for next potential IDs. Args: ids: Current decoded sequences. int tensor with shape [batch_size * beam_size, i + 1]. i: Loop index. cache: dictionary of values storing the encoder output, encoder-decoder attention bias, and previous decoder attention values. Returns: Tuple of (logits with shape [batch_size * beam_size, vocab_size], updated cache values) """ # Set decoder input to the last generated IDs decoder_input = ids[:, -1:] # Preprocess decoder input by getting embeddings and adding timing signal. # decoder_input = self.embedding_softmax_layer(decoder_input) source_decoder_input = decoder_input decoder_input = self.embedding_lookup(decoder_input) embedding_mask = tf.cast( tf.not_equal(source_decoder_input, 0), self.embedding_lookup.embeddings.dtype) decoder_input *= tf.expand_dims(embedding_mask, -1) if self._padded_decode: timing_signal_shape = timing_signal.shape.as_list() decoder_input += tf.slice(timing_signal, [i, 0], [1, timing_signal_shape[1]]) bias_shape = decoder_self_attention_bias.shape.as_list() self_attention_bias = tf.slice( decoder_self_attention_bias, [0, 0, i, 0], [bias_shape[0], bias_shape[1], 1, bias_shape[3]]) else: decoder_input += timing_signal[i:i + 1] self_attention_bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1] decoder_shape = tf_utils.get_shape_list(decoder_input, expected_rank=3) batch_size = decoder_shape[0] decoder_length = decoder_shape[1] attention_bias = cache.get("encoder_decoder_attention_bias") attention_bias = tf.where(attention_bias < 0, tf.zeros_like(attention_bias), tf.ones_like(attention_bias)) attention_bias = tf.squeeze(attention_bias, axis=[1]) attention_mask = tf.tile(attention_bias, [1, decoder_length, 1]) self_attention_bias = tf.where(self_attention_bias < 0, tf.zeros_like(self_attention_bias), tf.ones_like(self_attention_bias)) self_attention_bias = tf.squeeze(self_attention_bias, axis=[1]) self_attention_mask = tf.tile(self_attention_bias, [batch_size, 1, 1]) decoder_outputs = self.decoder_layer( decoder_input, cache.get("encoder_outputs"), memory_mask=self_attention_mask, target_mask=attention_mask, cache=cache, decode_loop_step=i if self._padded_decode else None) logits = embedding_linear(self.embedding_lookup.embeddings, decoder_outputs) logits = tf.squeeze(logits, axis=[1]) return logits, cache
def model_fn(self, features: Dict[str, tf.Tensor], labels: Dict[str, tf.Tensor], mode: tf.estimator.ModeKeys, params: Dict[str, Any]) -> tf.estimator.EstimatorSpec: """Model creation function for a GeneNet segmentation network. Args: features (Dict[str, tf.Tensor]): Dictionary of input Tensors. labels (Dict[str, tf.Tensor]): Dictionary of label Tensors. mode (tf.estimator.ModeKeys): Estimator mode. params (Dict[str, Any]): Additional model hyperparameters. Returns: (tf.estimator.EstimatorSpec): GeneNet network EstimatorSpec. """ logger.debug(f'Creating a model_fn on device {self.device}') with tf.device(self.device): # Get batch size from input shape batch_size = tf.shape(features['input'])[0] # Build from the GeneGraph. `tensor_map` maps `self.gene_graph` # Genes, as well as the special strings 'input', 'classes', # and 'probabilities' to TensorFlow Tensors. with tf.variable_scope(self.name): tensor_map = self.gene_graph.build(features, mode) # Update the trainable parameter count self.n_trainable_params = int( np.sum([ np.prod(v.get_shape().as_list()) for v in tf.trainable_variables(self.name) ])) if params['make_summaries']: # If True, attach variable summaries to each ConvolutionGene for key in tensor_map: if isinstance(key, ConvolutionGene): variable_summary(tensor_map[key]) # Get the output from the last gene in the gene graph's `genes` # OrderedDict output_gene = list(self.gene_graph.genes.values())[-1] n_classes = output_gene.n_classes # Logits are the output of the final PredictorGene in the GeneGraph logits = tensor_map[output_gene] logger.debug(f'Received logits with shape {logits.get_shape()}') if mode != tf.estimator.ModeKeys.PREDICT: logger.debug(f'Received labels with shape ' f'{labels["label"].get_shape()}') with tf.name_scope('classes'): classes = tf.argmax(input=logits, axis=1, name='classes') tensor_map['classes'] = classes with tf.name_scope('probabilities'): probabilities = tf.nn.softmax(logits, axis=1, name='probabilities') tensor_map['probabilities'] = probabilities # Both predictions (for PREDICT and EVAL modes) predictions = {'classes': classes, 'probabilities': probabilities} # Create summary ops for the tensor associated with each Gene or # str in params['image_settings']. image_settings: Sequence[ImageSettings] = [] if 'image_settings' in params: image_settings = params['image_settings'] for key, summary_params in image_settings: # Get the tensor associated with each key target_tensor = tensor_map[key] # Create an image summary image_summary(target_tensor, summary_params) # For a forward pass, no need to build optimization ops if mode == tf.estimator.ModeKeys.PREDICT: # Add the corner feature to predictions, for easy reconstruction # of large images from patches in PREDICT mode predictions['corner'] = features['corners'] # Create an EstimatorSpec spec = tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) logger.debug('Created PREDICT EstimatorSpec') return spec # Calculate loss: per-voxel weighted cross-entropy with tf.name_scope('loss'): # Cross-entropy from logits. Note the transpose to convert # channels-first data into channels-last data if mode == tf.estimator.ModeKeys.TRAIN: # During training, use per-voxel cross-entropy weighting # plus regularization terms c_lr_2d = params['c_lr_2d'] # Get the output from 'predictor_2d' Gene predictor_2d_gene = list(self.gene_graph.genes.values())[1] logits_2d = tensor_map[predictor_2d_gene] losses = [] loss_weights = [1, c_lr_2d] for l, logit in enumerate([logits, logits_2d]): # Experiment-specific tweak: add a loss term from the 2d # predictor as well if logit.get_shape().ndims == 4: xentropy = \ tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels['label'], logits=tf.transpose(logit, [0, 2, 3, 1], name=f'transpose_{l}'), name=f'softmax_xentropy_{l}') else: xentropy = \ tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels['label'], logits=tf.transpose(logit, [0, 2, 3, 4, 1], name=f'transpose_{l}'), name=f'softmax_xentropy_{l}') # Impose a weight floor # Get weight floor weight_floor = features['weight_floor'][0] # Treat zeroed areas differently - they shouldn't be # included in loss calculations weight = labels['weight'] nonzero_weights = tf.cast(tf.not_equal(weight, 0), dtype=weight.dtype) weight += tf.multiply(nonzero_weights, weight_floor) weights = tf.add(labels['weight'], weight_floor) weighted_xentropy = tf.multiply( weights, xentropy, name=f'weighted_xentropy_{l}') # Sum voxel loss values losses.append(loss_weights[l] * tf.reduce_sum( weighted_xentropy, name=f'sum_xentropy_{l}')) loss = tf.math.add_n(losses, name='total_xentropy') # Add regularization terms, weighted to ignore zeroed-out # areas frac_nonzero = tf.reduce_mean(nonzero_weights) loss += frac_nonzero * tf.losses.get_regularization_loss() else: # Experiment-specific tweak: add a loss term from the 2d # predictor as well if logits.get_shape().ndims == 4: xentropy = \ tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels['label'], logits=tf.transpose(logits, [0, 2, 3, 1], name='transpose'), name='softmax_xentropy') else: xentropy = \ tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels['label'], logits=tf.transpose(logits, [0, 2, 3, 4, 1], name='transpose'), name='softmax_xentropy') # For eval, use per-voxel cross entropy summed across all # voxels loss = tf.reduce_sum(xentropy, name='sum_xentropy') # Build training op if mode == tf.estimator.ModeKeys.TRAIN: # Get training hyperparameters learning_rate = 10**params['log_learning_rate'] decay_steps = 10**params['log_decay_steps'] exponential_decay_rate = params['exponential_decay_rate'] beta1 = 1 - 10**params['log_alpha1'] beta2 = 1 - 10**params['log_alpha2'] epsilon = 10**params['log_epsilon'] with tf.name_scope('train'): lr = tf.train.exponential_decay( learning_rate=learning_rate, global_step=tf.train.get_global_step(), decay_steps=decay_steps, decay_rate=exponential_decay_rate, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=beta1, beta2=beta2, epsilon=epsilon, name='adam') train_op = optimizer.minimize( loss=loss, global_step=tf.train.get_global_step()) # Create an EstimatorSpec spec = tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) logger.debug('Created TRAIN EstimatorSpec') # Minimize the loss in TRAIN mode return spec # Build evaluation op with tf.name_scope('eval'): # Add evaluation metrics # noinspection PyUnresolvedReferences flat_labels = tf.layers.flatten(labels['label']) flat_labels = \ tf.reshape( tensor=flat_labels, shape=[batch_size*flat_labels.get_shape()[1]]) flat_predictions = tf.layers.flatten(predictions['classes']) flat_predictions = tf.reshape( tensor=flat_predictions, shape=[batch_size * flat_predictions.get_shape()[1]]) eval_ops = { 'accuracy': tf.metrics.accuracy(labels=labels['label'], predictions=predictions['classes'], name='accuracy'), 'mean_iou': tf.metrics.mean_iou(labels=labels['label'], predictions=predictions['classes'], num_classes=n_classes), 'adj_rand_idx': _adj_rand_idx_metric_op(flat_labels, flat_predictions) } # Create an EstimatorSpec spec = tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_ops) logger.debug('Created EVAL EstimatorSpec') return spec
def call( self, inputs, attention_mask=None, langs=None, token_type_ids=None, position_ids=None, lengths=None, cache=None, head_mask=None, inputs_embeds=None, output_attentions=None, output_hidden_states=None, return_dict=None, training=False, ): # removed: src_enc=None, src_len=None if isinstance(inputs, (tuple, list)): input_ids = inputs[0] attention_mask = inputs[1] if len(inputs) > 1 else attention_mask langs = inputs[2] if len(inputs) > 2 else langs token_type_ids = inputs[3] if len(inputs) > 3 else token_type_ids position_ids = inputs[4] if len(inputs) > 4 else position_ids lengths = inputs[5] if len(inputs) > 5 else lengths cache = inputs[6] if len(inputs) > 6 else cache head_mask = inputs[7] if len(inputs) > 7 else head_mask inputs_embeds = inputs[8] if len(inputs) > 8 else inputs_embeds output_attentions = inputs[9] if len( inputs) > 9 else output_attentions output_hidden_states = inputs[10] if len( inputs) > 10 else output_hidden_states return_dict = inputs[11] if len(inputs) > 11 else return_dict assert len(inputs) <= 12, "Too many inputs." elif isinstance(inputs, (dict, BatchEncoding)): input_ids = inputs.get("input_ids") attention_mask = inputs.get("attention_mask", attention_mask) langs = inputs.get("langs", langs) token_type_ids = inputs.get("token_type_ids", token_type_ids) position_ids = inputs.get("position_ids", position_ids) lengths = inputs.get("lengths", lengths) cache = inputs.get("cache", cache) head_mask = inputs.get("head_mask", head_mask) inputs_embeds = inputs.get("inputs_embeds", inputs_embeds) output_attentions = inputs.get("output_attentions", output_attentions) output_hidden_states = inputs.get("output_hidden_states", output_hidden_states) return_dict = inputs.get("return_dict", return_dict) assert len(inputs) <= 12, "Too many inputs." else: input_ids = inputs output_attentions = output_attentions if output_attentions is not None else self.output_attentions output_hidden_states = output_hidden_states if output_hidden_states is not None else self.output_hidden_states return_dict = return_dict if return_dict is not None else self.return_dict if input_ids is not None and inputs_embeds is not None: raise ValueError( "You cannot specify both input_ids and inputs_embeds at the same time" ) elif input_ids is not None: bs, slen = shape_list(input_ids) elif inputs_embeds is not None: bs, slen = shape_list(inputs_embeds)[:2] else: raise ValueError( "You have to specify either input_ids or inputs_embeds") if lengths is None: if input_ids is not None: lengths = tf.reduce_sum(tf.cast(tf.not_equal( input_ids, self.pad_index), dtype=tf.int32), axis=1) else: lengths = tf.convert_to_tensor([slen] * bs, tf.int32) # mask = input_ids != self.pad_index # check inputs # assert shape_list(lengths)[0] == bs tf.debugging.assert_equal( shape_list(lengths)[0], bs ), f"Expected batch size {shape_list(lengths)[0]} and received batch size {bs} mismatched" # assert lengths.max().item() <= slen # input_ids = input_ids.transpose(0, 1) # batch size as dimension 0 # assert (src_enc is None) == (src_len is None) # if src_enc is not None: # assert self.is_decoder # assert src_enc.size(0) == bs # generate masks mask, attn_mask = get_masks(slen, lengths, self.causal, padding_mask=attention_mask) # if self.is_decoder and src_enc is not None: # src_mask = torch.arange(src_len.max(), dtype=torch.long, device=lengths.device) < src_len[:, None] # position_ids if position_ids is None: position_ids = tf.expand_dims(tf.range(slen), axis=0) else: # assert shape_list(position_ids) == [bs, slen] # (slen, bs) tf.debugging.assert_equal( shape_list(position_ids), [bs, slen] ), f"Position id shape {shape_list(position_ids)} and input shape {[bs, slen]} mismatched" # position_ids = position_ids.transpose(0, 1) # langs if langs is not None: # assert shape_list(langs) == [bs, slen] # (slen, bs) tf.debugging.assert_equal( shape_list(langs), [bs, slen] ), f"Lang shape {shape_list(langs)} and input shape {[bs, slen]} mismatched" # langs = langs.transpose(0, 1) # Prepare head mask if needed # 1.0 in head_mask indicate we keep the head # attention_probs has shape bsz x n_heads x N x N # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads] # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x qlen x klen] if head_mask is not None: raise NotImplementedError else: head_mask = [None] * self.n_layers # do not recompute cached elements if cache is not None and input_ids is not None: _slen = slen - cache["slen"] input_ids = input_ids[:, -_slen:] position_ids = position_ids[:, -_slen:] if langs is not None: langs = langs[:, -_slen:] mask = mask[:, -_slen:] attn_mask = attn_mask[:, -_slen:] # embeddings if inputs_embeds is None: inputs_embeds = self.embeddings(input_ids) tensor = inputs_embeds + self.position_embeddings(position_ids) if langs is not None and self.use_lang_emb: tensor = tensor + self.lang_embeddings(langs) if token_type_ids is not None: tensor = tensor + self.embeddings(token_type_ids) tensor = self.layer_norm_emb(tensor) tensor = self.dropout(tensor, training=training) tensor = tensor * mask[..., tf.newaxis] # hidden_states and attentions cannot be None in graph mode. hidden_states = () attentions = () # transformer layers for i in range(self.n_layers): # LayerDrop dropout_probability = tf.random.uniform([1], 0, 1) if training and tf.less(dropout_probability, self.layerdrop): continue if output_hidden_states: hidden_states = hidden_states + (tensor, ) # self attention if not self.pre_norm: attn_outputs = self.attentions[i](tensor, attn_mask, None, cache, head_mask[i], output_attentions, training=training) attn = attn_outputs[0] if output_attentions: attentions = attentions + (attn_outputs[1], ) attn = self.dropout(attn, training=training) tensor = tensor + attn tensor = self.layer_norm1[i](tensor) else: tensor_normalized = self.layer_norm1[i](tensor) attn_outputs = self.attentions[i](tensor_normalized, attn_mask, None, cache, head_mask[i], output_attentions, training=training) attn = attn_outputs[0] if output_attentions: attentions = attentions + (attn_outputs[1], ) attn = self.dropout(attn, training=training) tensor = tensor + attn # encoder attention (for decoder only) # if self.is_decoder and src_enc is not None: # attn = self.encoder_attn[i](tensor, src_mask, kv=src_enc, cache=cache) # attn = F.dropout(attn, p=self.dropout, training=self.training) # tensor = tensor + attn # tensor = self.layer_norm15[i](tensor) # FFN if not self.pre_norm: tensor = tensor + self.ffns[i](tensor) tensor = self.layer_norm2[i](tensor) else: tensor_normalized = self.layer_norm2[i](tensor) tensor = tensor + self.ffns[i](tensor_normalized) tensor = tensor * mask[..., tf.newaxis] # Add last hidden state if output_hidden_states: hidden_states = hidden_states + (tensor, ) # update cache length if cache is not None: cache["slen"] += tensor.size(1) # move back sequence length to dimension 0 # tensor = tensor.transpose(0, 1) # Set to None here if the output booleans are at False hidden_states = hidden_states if output_hidden_states else None attentions = attentions if output_attentions else None if not return_dict: return tuple(v for v in [tensor, hidden_states, attentions] if v is not None) return TFBaseModelOutput(last_hidden_state=tensor, hidden_states=hidden_states, attentions=attentions)
def build_model(self): features = self.features #print(features) captions = self.captions #print(captions) batch_size = tf.shape(features)[0] captions_in = captions[:, :self.T] #print(captions_in) captions_out = captions[:, 1:] #print(captions_out) mask = tf.to_float(tf.not_equal(captions_out, self._null)) #print(mask) # batch normalize feature vectors features = self._batch_norm(features, mode='train', name='conv_features') #print(features) h = self._get_initial_lstm(features=features) #print(c,h) x = self._word_embedding(inputs=captions_in) #print(x) features_proj = self._project_features(features=features) vg = self._get_vg(features=features) #print(features_proj) loss = 0.0 alpha_list = [] gru_cell = tf.nn.rnn_cell.GRUCell(num_units=self.H) #print(lstm_cell) for t in range(self.T): context, alpha = self._attention_layer(features, features_proj, h, reuse=(t != 0)) #print(context,alpha) alpha_list.append(alpha[:, :]) #print(len(alpha_list)) if self.selector: context, beta = self._selector(context, h, reuse=(t != 0)) with tf.variable_scope('lstm', reuse=(t != 0)): #print(x[:,t,:]) _, h = gru_cell(inputs=tf.concat([x[:, t, :], vg], 1), state=h) # context, alpha = self._attention_layer(features, features_proj, h ,reuse=(t!=0)) # alpha_list.append(alpha[:,:]) logits = self._decode_lstm(x[:, t, :], h, context, dropout=self.dropout, reuse=(t != 0)) loss += tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=captions_out[:, t], logits=logits) * mask[:, t]) if self.alpha_c > 0: print("In Alpha") alphas = tf.transpose(tf.stack(alpha_list), (1, 0, 2)) # (N, T, L) alphas_all = tf.reduce_sum(alphas, 1) # (N, L) #print(alphas_all) alpha_reg = self.alpha_c * tf.reduce_sum( (self.T / self.L - alphas_all)**2) loss += alpha_reg return loss / tf.to_float(batch_size)
def extract_trained_prototypes( embedding, #seems like only extracting prototypes for a single image ho location_features, cluster_labels, num_clusters, kmeans_iterations, panoptic_labels, panoptic_label_divisor, ignore_label, evaluate_semantic_or_panoptic): """Extracts the trained prototypes in an image. Args: embedding: A 2-D float tensor with shape `[pixels, embedding_dim]`. location_features: A 2-D float tensor for location features with shape `[pixels, 2]`. cluster_labels: A 1-D integer tensor for cluster labels for all pixels. num_clusters: An integer scalar for total number of clusters. kmeans_iterations: Number of iterations for the k-means clustering. panoptic_labels: A 1-D integer tensor for panoptic labels for all pixels. panoptic_label_divisor: An integer constant to separate semantic and instance labels from panoptic labels. ignore_label: The semantic label to ignore. evaluate_semantic_or_panoptic: A boolean that specifies whether to evaluate semantic or panoptic segmentation. Returns: prototype_features: A 2-D float tensor for prototype features with shape `[num_prototypes, embedding_dim]`. prototype_labels: A 1-D integer tensor for prototype labels. """ # Collect pixels of valid semantic classes. valid_pixels = tf.where( tf.not_equal(panoptic_labels // panoptic_label_divisor, ignore_label)) panoptic_labels = tf.squeeze(tf.gather(panoptic_labels, valid_pixels), axis=1) cluster_labels = tf.squeeze(tf.gather(cluster_labels, valid_pixels), axis=1) embedding = tf.squeeze(tf.gather(embedding, valid_pixels), axis=1) location_features = tf.squeeze(tf.gather(location_features, valid_pixels), axis=1) # Generate cluster labels via kmeans clustering. embedding_with_location = tf.concat([embedding, location_features], 1) embedding_with_location = common_utils.normalize_embedding( embedding_with_location) cluster_labels = common_utils.kmeans_with_initial_labels( embedding_with_location, cluster_labels, num_clusters, kmeans_iterations) _, cluster_labels = tf.unique(cluster_labels) if evaluate_semantic_or_panoptic == 'panoptic': # Calculate semantic and unique instance labels for all pixels. label_mapping, unique_panoptic_labels = tf.unique(panoptic_labels) # Find pixels of majority classes. select_pixels, majority_labels = find_majority_label_index( unique_panoptic_labels, cluster_labels) else: # Find pixels of majority semantic classes. semantic_labels = panoptic_labels // panoptic_label_divisor select_pixels, majority_labels = find_majority_label_index( semantic_labels, cluster_labels) cluster_labels = tf.squeeze(tf.gather(cluster_labels, select_pixels), axis=1) embedding = tf.squeeze(tf.gather(embedding, select_pixels), axis=1) # Calculate the majority semantic and instance label for each prototype. if evaluate_semantic_or_panoptic == 'panoptic': prototype_panoptic_labels = tf.gather(label_mapping, majority_labels) prototype_semantic_labels = (prototype_panoptic_labels // panoptic_label_divisor) prototype_instance_labels = majority_labels else: prototype_semantic_labels = majority_labels prototype_instance_labels = tf.zeros_like(majority_labels) # Calculate the prototype features. prototype_features = common_utils.calculate_prototypes_from_labels( embedding, cluster_labels) return (prototype_features, prototype_semantic_labels, prototype_instance_labels)
def build(self, features): src_ids = features['src_ids'] trg_ids = None self.batch_size = tf.shape(src_ids)[0] if self.is_training: trg_ids = features['trg_ids'] with tf.variable_scope('src_encoder'): self.shared_tensors['src_ids'] = src_ids src_mask = tf.cast(tf.equal(src_ids, self.data.vocab.pad_id), tf.float32) src_bias = common_attention.attention_bias_ignore_padding(src_mask) self.shared_tensors['src_bias'] = src_bias self.shared_tensors['src_mask'] = src_mask src_embs = self._embedding_fn(src_ids) src_embs = common_attention.add_timing_signal_1d(src_embs) if 'syntax_gen' in self.flags.control_mode: template_comp_ids = features['template_comp_ids'] # print_op = tf.print("template_comp_ids output:", template_comp_ids) # with tf.control_dependencies([print_op]): # template_comp_ids = tf.identity(template_comp_ids) template_embs = self._embedding_fn( template_comp_ids, self.shared_tensors['syntax_embedding_table']) template_scale = tf.get_variable( 'template_scale', shape=[1, self.flags.syntax_level, 1, 1], trainable=True, dtype=tf.float32) template_embs *= template_scale template_embs = tf.reduce_mean(template_embs, axis=1) src_embs += template_embs if 'gpt2' in self.flags.model_mode: src_outputs = model.gpt2_encoder(self.hparams, src_embs, encoder_bias=src_bias) elif 't2t' in self.flags.model_mode: src_outputs = transformer.transformer_encoder( src_embs, src_bias, self.hparams) elif 'bert' in self.flags.model_mode: bert_model = BertModel( config=BertConfig.from_json_file( self.flags.bert_config_file), is_training=self.is_training, input_ids=src_ids, input_mask=1.0 - src_mask, embeddings=self.shared_tensors['word_embedding_table']) src_outputs = bert_model.get_sequence_output() else: raise ValueError('model_mode not known.') self.shared_tensors['src_outputs'] = src_outputs if self.flags.control_mode: control_ids = features['control_ids'] control_mask = tf.cast( tf.equal(control_ids, self.data.vocab.pad_id), tf.float32) control_bias = common_attention.attention_bias_ignore_padding( control_mask) control_embs = self._embedding_fn(control_ids) if 'gpt2' in self.flags.model_mode: control_outputs = model.gpt2_encoder( self.hparams, control_embs, encoder_bias=control_bias) elif 't2t' in self.flags.model_mode or 'bert' in self.flags.model_mode: control_outputs = transformer.transformer_encoder( control_embs, control_bias, self.hparams, name='control_encoder') else: raise ValueError('model_mode not known.') self.shared_tensors['control_vec'] = features['control_vec'] self.shared_tensors['control_outputs'] = control_outputs self.shared_tensors['control_bias'] = control_bias self.shared_tensors['extra_vec'] = features['extra_vec'] # if 'syntax_gen' in self.flags.control_mode: # template_comp_ids = features['template_comp_ids'] # template_comp_outputs, template_comp_bias = self.encode_syntax_template(template_comp_ids) # self.shared_tensors['template_comp_outputs'] = template_comp_outputs # self.shared_tensors['template_comp_bias'] = template_comp_bias batch_go = tf.tile( tf.expand_dims(self._embedding_fn(self.data.vocab.go_id), axis=0), [self.batch_size, 1]) batch_go_id = tf.tile( tf.constant(self.data.vocab.go_id, tf.int32, shape=[ 1, ]), [self.batch_size]) self.shared_tensors['batch_go'] = batch_go self.shared_tensors['batch_go_id'] = batch_go_id batch_syntax_go = tf.tile( tf.expand_dims(self._embedding_fn(self.data.syntax_vocab.go_id), axis=0), [self.batch_size, 1]) batch_syntax_go_id = tf.tile( tf.constant(self.data.syntax_vocab.go_id, tf.int32, shape=[ 1, ]), [self.batch_size]) self.shared_tensors['batch_syntax_go'] = batch_syntax_go self.shared_tensors['batch_syntax_go_id'] = batch_syntax_go_id outputs = {} outputs['src_ids'] = src_ids if self.flags.control_mode: outputs["control_vec"] = self.shared_tensors['control_vec'] # if 'predict' in self.flags.control_mode: # control_vec, outputs = self.classify( # outputs, # self.shared_tensors['control_vec'], # "fix_predict" in self.flags.control_mode) # self.shared_tensors['control_vec'] = control_vec if self.flags.control_mode: if "flatten" not in self.flags.control_mode: # print_op = tf.print("Debug output:", self.shared_tensors['control_vec']) # with tf.control_dependencies([print_op]): # self.shared_tensors['control_vec'] = tf.identity(self.shared_tensors['control_vec']) dupicate_copies = self.flags.dimension // self.data.control_vec_len batch_size = self.flags.train_batch_size if self.is_training else self.flags.eval_batch_size control_vec = tf.concat([ tf.reshape( tf.transpose( tf.tile( tf.expand_dims( self.shared_tensors['control_vec'][o, :], axis=0), [dupicate_copies, 1])), [1, self.flags.dimension]) for o in range(batch_size) ], axis=0) more_control_vec = tf.zeros([ batch_size, self.flags.dimension % self.data.control_vec_len ]) if not self.is_training and self.flags.beam_search_size > 1: more_control_vec = tf.zeros([ batch_size * self.flags.beam_search_size, self.flags.dimension % self.data.control_vec_len ]) self.shared_tensors['control_vec'] = tf.concat( [control_vec, more_control_vec], axis=1) else: score = tf.expand_dims(self.shared_tensors['control_vec'], axis=-1) score = tf.tile(score, [1, 1, self.flags.dimension]) self.shared_tensors['control_vec'] = score if "encoder" in self.flags.control_mode: src_outputs = self.update_embedding(src_outputs, False) self.shared_tensors['src_outputs'] = src_outputs with tf.variable_scope("trg_decoder"): if self.is_training: # Generate syntax if 'syntax_gen' in self.flags.control_mode: syntax_losses = [] template_simp_ids = features['template_simp_ids'] # print_op = tf.print("template_simp_ids output:", template_simp_ids) # with tf.control_dependencies([print_op]): # template_simp_ids = tf.identity(template_simp_ids) template_simp_ids_layers = tf.unstack(template_simp_ids, axis=1) for l_id in range(self.flags.syntax_level): template_simp_ids_layer = template_simp_ids_layers[ l_id] # print_op = tf.print("template_simp_ids_layer %s output:" % l_id, template_simp_ids_layer) # with tf.control_dependencies([print_op]): # template_simp_ids_layer = tf.identity(template_simp_ids_layer) template_simp_ids_layer_list = tf.unstack( template_simp_ids_layer, axis=1) template_simp_ids_layer_inp_list = [ batch_syntax_go_id ] + template_simp_ids_layer_list[:-1] template_simp_emb_list = self._embedding_fn( template_simp_ids_layer_inp_list, self.shared_tensors['syntax_embedding_table']) template_simp_emb = tf.stack(template_simp_emb_list, axis=1) template_mask = tf.cast( tf.equal(template_simp_ids_layers[0], self.data.vocab.pad_id), tf.float32) template_bias = common_attention.attention_bias_ignore_padding( template_mask) if l_id == 0: self.shared_tensors[ 'template_prev_simp_outputs'] = None self.shared_tensors['template_simp_bias'] = None else: template_simp_prev_ids_layers = template_simp_ids_layers[: l_id] template_simp_prev_ids = tf.stack( template_simp_prev_ids_layers, axis=1) template_simp_prev_embs = self._embedding_fn( template_simp_prev_ids, self.shared_tensors['syntax_embedding_table']) cur_template_scale = template_scale[:, :l_id, :, :] template_simp_prev_embs *= cur_template_scale template_simp_prev_embs = tf.reduce_mean( template_simp_prev_embs, axis=1) template_simp_outputs, template_simp_bias = self.encode_syntax_template( template_simp_prev_embs, template_bias) self.shared_tensors[ 'template_prev_simp_outputs'] = template_simp_outputs self.shared_tensors[ 'template_simp_bias'] = template_simp_bias syntax_outputs = self.decode_syntax_template( template_simp_emb) syntax_logits = tf.nn.conv1d( syntax_outputs, tf.expand_dims( self.shared_tensors['proj_syntax_w'], axis=0), 1, 'SAME') + tf.expand_dims(tf.expand_dims( self.shared_tensors['proj_syntax_b'], axis=0), axis=0) # syntax_gen = tf.argmax(syntax_logits, axis=-1) syntax_weight = tf.cast( tf.not_equal(template_simp_ids_layer, self.data.syntax_vocab.pad_id), tf.float32) syntax_loss = sequence_loss( logits=syntax_logits, targets=template_simp_ids_layer, weights=syntax_weight) syntax_losses.append(syntax_loss) outputs['loss_syntax'] = tf.add_n(syntax_losses) outputs['perplexity_syntax'] = tf.exp( outputs['loss_syntax']) tf.summary.scalar("loss_syntax", outputs['loss_syntax']) tf.summary.scalar("perplexity_syntax", outputs['perplexity_syntax']) template_simp_prev_ids_layers = template_simp_ids_layers template_simp_prev_ids = tf.stack( template_simp_prev_ids_layers, axis=1) template_simp_prev_embs = self._embedding_fn( template_simp_prev_ids, self.shared_tensors['syntax_embedding_table']) cur_template_scale = template_scale template_simp_prev_embs *= cur_template_scale template_simp_prev_embs = tf.reduce_mean( template_simp_prev_embs, axis=1) template_simp_outputs, template_simp_bias = self.encode_syntax_template( template_simp_prev_embs, template_bias) self.shared_tensors[ 'template_simp_outputs'] = template_simp_outputs self.shared_tensors[ 'template_simp_bias'] = template_simp_bias # Generate sentence trg_ids_list = tf.unstack(trg_ids, axis=1) trg_input_ids_list = [batch_go_id] + trg_ids_list[:-1] trg_emb_list = self._embedding_fn(trg_input_ids_list) trg_input_ids = tf.stack(trg_input_ids_list, axis=1) trg_output_ids = tf.stack(trg_ids_list, axis=1) trg_emb = tf.stack(trg_emb_list, axis=1) decoder_outputs = self.decode_srcs_to_trgs( trg_emb=trg_emb, trg_input_ids=trg_input_ids, outputs=outputs) word_logits = tf.nn.conv1d( decoder_outputs, tf.expand_dims(self.shared_tensors['proj_word_w'], axis=0), 1, 'SAME') + tf.expand_dims(tf.expand_dims( self.shared_tensors['proj_word_b'], axis=0), axis=0) word_gen = tf.argmax(word_logits, axis=-1) outputs['gen'] = word_gen outputs['logits'] = word_logits weight = tf.cast( tf.not_equal(trg_output_ids, self.data.vocab.pad_id), tf.float32) loss = sequence_loss(logits=word_logits, targets=trg_output_ids, weights=weight) outputs['loss_decoder'] = loss outputs['perplexity_decoder'] = tf.exp(loss) tf.summary.scalar("loss_decoder", outputs['loss_decoder']) tf.summary.scalar("perplexity_decoder", outputs['perplexity_decoder']) # if 'predict' in self.flags.control_mode: # # outputs['loss_length'] = outputs['loss_length'] # # outputs['loss_syntax'] = outputs['loss_syntax'] # # outputs['loss'] += outputs['loss_split'] # outputs["loss_pred"] = outputs['loss_length'] + outputs['loss_syntax'] + outputs['loss_split'] # tf.summary.scalar("loss_length", outputs['loss_length']) # tf.summary.scalar("loss_syntax", outputs['loss_syntax']) # tf.summary.scalar("loss_split", outputs['loss_split']) else: outputs['gen_src_syntax_ids'] = features['template_comp_ids'] confident_scores = [] self._tile_variables() if 'syntax_gen' in self.flags.control_mode: def symbol_to_syntax_logits_fn(gen_ids): cur_ids = tf.concat([ tf.expand_dims(batch_syntax_go_id, axis=-1), gen_ids[:, 1:] ], axis=1) cur_embs = tf.nn.embedding_lookup( self.shared_tensors['syntax_embedding_table'], cur_ids) cur_outputs = self.decode_syntax_template(cur_embs) cur_logit = tf.matmul( cur_outputs[:, -1, :], self.shared_tensors['proj_syntax_w'] ) + self.shared_tensors['proj_syntax_b'] return cur_logit template_simp_prev_ids_layers = [] for l_id in range(self.flags.syntax_level): if l_id == 0: self.shared_tensors[ 'template_prev_simp_outputs'] = None self.shared_tensors['template_simp_bias'] = None else: template_simp_prev_ids = tf.stack( template_simp_prev_ids_layers, axis=1) template_simp_prev_embs = self._embedding_fn( template_simp_prev_ids, self.shared_tensors['syntax_embedding_table']) cur_template_scale = template_scale[:, :l_id, :, :] template_simp_prev_embs *= cur_template_scale template_simp_prev_embs = tf.reduce_mean( template_simp_prev_embs, axis=1) template_mask = tf.cast( tf.equal(template_simp_prev_ids_layers[-1], self.data.vocab.pad_id), tf.float32) template_bias = common_attention.attention_bias_ignore_padding( template_mask) template_simp_outputs, template_simp_bias = self.encode_syntax_template( template_simp_prev_embs, template_bias) self.shared_tensors[ 'template_prev_simp_outputs'] = template_simp_outputs self.shared_tensors[ 'template_simp_bias'] = template_simp_bias beam_ids, beam_score = beam_search.beam_search( symbols_to_logits_fn=symbol_to_syntax_logits_fn, initial_ids=tf.ones([self.flags.eval_batch_size], tf.int32) * self.data.syntax_vocab.go_id, beam_size=self.flags.beam_search_size, decode_length=self.flags.max_syntax_trg_len, vocab_size=self.data.syntax_vocab.size(), alpha=0.6, eos_id=self.data.syntax_vocab.eos_id) top_beam_ids = beam_ids[:, 0, 1:] top_beam_ids = tf.pad( top_beam_ids, [[0, 0], [ 0, self.flags.max_syntax_trg_len - tf.shape(top_beam_ids)[1] ]]) confident_score = -beam_score[:, 0] / tf.to_float( tf.shape(top_beam_ids)[1]) confident_scores.append(confident_score) # outputs['gen_src_syntax_ids'] = features['template_comp_ids'] # outputs['gen_trg_syntax_ids'] = top_beam_ids # outputs['gen_trg_syntax_scores'] = confident_score template_simp_prev_ids_layers.append(top_beam_ids) template_simp_prev_ids = tf.stack( template_simp_prev_ids_layers, axis=1) outputs['gen_trg_syntax_ids'] = template_simp_prev_ids outputs['gen_trg_syntax_scores'] = tf.add_n( confident_scores) template_simp_prev_embs = self._embedding_fn( template_simp_prev_ids, self.shared_tensors['syntax_embedding_table']) template_simp_prev_embs *= template_scale template_simp_prev_embs = tf.reduce_mean( template_simp_prev_embs, axis=1) template_mask = tf.cast( tf.equal(template_simp_prev_ids_layers[-1], self.data.vocab.pad_id), tf.float32) template_bias = common_attention.attention_bias_ignore_padding( template_mask) template_simp_outputs, template_simp_bias = self.encode_syntax_template( template_simp_prev_embs, template_bias) self.shared_tensors[ 'template_simp_outputs'] = template_simp_outputs self.shared_tensors[ 'template_simp_bias'] = template_simp_bias def symbol_to_logits_fn(gen_ids): cur_ids = tf.concat( [tf.expand_dims(batch_go_id, axis=-1), gen_ids[:, 1:]], axis=1) cur_embs = tf.nn.embedding_lookup( self.shared_tensors['word_embedding_table'], cur_ids) cur_outputs = self.decode_srcs_to_trgs( trg_emb=cur_embs, trg_input_ids=cur_ids) cur_logit = tf.matmul( cur_outputs[:, -1, :], self.shared_tensors['proj_word_w'] ) + self.shared_tensors['proj_word_b'] return cur_logit beam_ids, beam_score = beam_search.beam_search( symbols_to_logits_fn=symbol_to_logits_fn, initial_ids=tf.ones([self.flags.eval_batch_size], tf.int32) * self.data.vocab.go_id, beam_size=self.flags.beam_search_size, decode_length=self.flags.max_trg_len, vocab_size=self.data.vocab.size() + len(self.data.vocab.more_tokens), alpha=0.6, eos_id=self.data.vocab.eos_id) top_beam_ids = beam_ids[:, 0, 1:] top_beam_ids = tf.pad( top_beam_ids, [[0, 0], [0, self.flags.max_trg_len - tf.shape(top_beam_ids)[1]]]) confident_score = -beam_score[:, 0] / tf.to_float( tf.shape(top_beam_ids)[1]) outputs['gen_trg_ids'] = top_beam_ids outputs['gen_trg_scores'] = confident_score if self.flags.control_mode: outputs['control_ids'] = features['control_ids'] return outputs
def process_dataset(self, *row_parts): row_parts = list(row_parts) if self.use_multilanguage: language_id = row_parts[0] row_parts = row_parts[1] else: language_id = None word = row_parts[0] # (, ) if not self.is_evaluating and self.config.RANDOM_CONTEXTS: all_contexts = tf.stack(row_parts[1:]) all_contexts_padded = tf.concat([all_contexts, [self.context_pad]], axis=-1) index_of_blank_context = tf.where( tf.equal(all_contexts_padded, self.context_pad)) num_contexts_per_example = tf.reduce_min(index_of_blank_context) # if there are less than self.max_contexts valid contexts, still sample self.max_contexts safe_limit = tf.cast( tf.maximum(num_contexts_per_example, self.config.MAX_CONTEXTS), tf.int32) rand_indices = tf.random_shuffle( tf.range(safe_limit))[:self.config.MAX_CONTEXTS] contexts = tf.gather(all_contexts, rand_indices) # (max_contexts,) else: contexts = row_parts[1:(self.config.MAX_CONTEXTS + 1)] # (max_contexts,) # contexts: (max_contexts, ) split_contexts = tf.string_split(contexts, delimiter=',', skip_empty=False) sparse_split_contexts = tf.sparse.SparseTensor( indices=split_contexts.indices, values=split_contexts.values, dense_shape=[self.config.MAX_CONTEXTS, 3]) dense_split_contexts = tf.reshape( tf.sparse.to_dense(sp_input=sparse_split_contexts, default_value=Common.PAD), shape=[self.config.MAX_CONTEXTS, 3]) # (batch, max_contexts, 3) split_target_labels = tf.string_split(tf.expand_dims(word, -1), delimiter='|') target_dense_shape = [ 1, tf.maximum(tf.to_int64(self.config.MAX_TARGET_PARTS), split_target_labels.dense_shape[1] + 1) ] sparse_target_labels = tf.sparse.SparseTensor( indices=split_target_labels.indices, values=split_target_labels.values, dense_shape=target_dense_shape) dense_target_label = tf.reshape( tf.sparse.to_dense(sp_input=sparse_target_labels, default_value=Common.PAD), [-1]) index_of_blank = tf.where(tf.equal(dense_target_label, Common.PAD)) target_length = tf.reduce_min(index_of_blank) dense_target_label = dense_target_label[:self.config.MAX_TARGET_PARTS] clipped_target_lengths = tf.clip_by_value( target_length, clip_value_min=0, clip_value_max=self.config.MAX_TARGET_PARTS) target_word_labels = tf.concat( [self.target_table.lookup(dense_target_label), [0]], axis=-1) # (max_target_parts + 1) of int path_source_strings = tf.slice( dense_split_contexts, [0, 0], [self.config.MAX_CONTEXTS, 1]) # (max_contexts, 1) flat_source_strings = tf.reshape(path_source_strings, [-1]) # (max_contexts) split_source = tf.string_split( flat_source_strings, delimiter='|', skip_empty=False) # (max_contexts, max_name_parts) sparse_split_source = tf.sparse.SparseTensor( indices=split_source.indices, values=split_source.values, dense_shape=[ self.config.MAX_CONTEXTS, tf.maximum(tf.to_int64(self.config.MAX_NAME_PARTS), split_source.dense_shape[1]) ]) dense_split_source = tf.sparse.to_dense( sp_input=sparse_split_source, default_value=Common.PAD) # (max_contexts, max_name_parts) dense_split_source = tf.slice(dense_split_source, [0, 0], [-1, self.config.MAX_NAME_PARTS]) path_source_indices = self.subtoken_table.lookup( dense_split_source) # (max_contexts, max_name_parts) path_source_lengths = tf.reduce_sum( tf.cast(tf.not_equal(dense_split_source, Common.PAD), tf.int32), -1) # (max_contexts) path_strings = tf.slice(dense_split_contexts, [0, 1], [self.config.MAX_CONTEXTS, 1]) flat_path_strings = tf.reshape(path_strings, [-1]) split_path = tf.string_split(flat_path_strings, delimiter='|', skip_empty=False) sparse_split_path = tf.sparse.SparseTensor( indices=split_path.indices, values=split_path.values, dense_shape=[ self.config.MAX_CONTEXTS, self.config.MAX_PATH_LENGTH ]) dense_split_path = tf.sparse.to_dense( sp_input=sparse_split_path, default_value=Common.PAD) # (batch, max_contexts, max_path_length) node_indices = self.node_table.lookup( dense_split_path) # (max_contexts, max_path_length) path_lengths = tf.reduce_sum( tf.cast(tf.not_equal(dense_split_path, Common.PAD), tf.int32), -1) # (max_contexts) path_target_strings = tf.slice( dense_split_contexts, [0, 2], [self.config.MAX_CONTEXTS, 1]) # (max_contexts, 1) flat_target_strings = tf.reshape(path_target_strings, [-1]) # (max_contexts) split_target = tf.string_split( flat_target_strings, delimiter='|', skip_empty=False) # (max_contexts, max_name_parts) sparse_split_target = tf.sparse.SparseTensor( indices=split_target.indices, values=split_target.values, dense_shape=[ self.config.MAX_CONTEXTS, tf.maximum(tf.to_int64(self.config.MAX_NAME_PARTS), split_target.dense_shape[1]) ]) dense_split_target = tf.sparse.to_dense( sp_input=sparse_split_target, default_value=Common.PAD) # (max_contexts, max_name_parts) dense_split_target = tf.slice(dense_split_target, [0, 0], [-1, self.config.MAX_NAME_PARTS]) path_target_indices = self.subtoken_table.lookup( dense_split_target) # (max_contexts, max_name_parts) path_target_lengths = tf.reduce_sum( tf.cast(tf.not_equal(dense_split_target, Common.PAD), tf.int32), -1) # (max_contexts) valid_contexts_mask = tf.to_float( tf.not_equal( tf.reduce_max(path_source_indices, -1) + tf.reduce_max(node_indices, -1) + tf.reduce_max(path_target_indices, -1), 0)) return { TARGET_STRING_KEY: word, TARGET_INDEX_KEY: target_word_labels, TARGET_LENGTH_KEY: clipped_target_lengths, PATH_SOURCE_INDICES_KEY: path_source_indices, NODE_INDICES_KEY: node_indices, PATH_TARGET_INDICES_KEY: path_target_indices, VALID_CONTEXT_MASK_KEY: valid_contexts_mask, PATH_SOURCE_LENGTHS_KEY: path_source_lengths, PATH_LENGTHS_KEY: path_lengths, PATH_TARGET_LENGTHS_KEY: path_target_lengths, PATH_SOURCE_STRINGS_KEY: path_source_strings, PATH_STRINGS_KEY: path_strings, PATH_TARGET_STRINGS_KEY: path_target_strings, LANGUAGE_ID: language_id }
def dense2sparse(tensor): tensor_idx = tf.where(tf.not_equal(tensor, 0)) tensor_sparse = tf.SparseTensor(tensor_idx, tf.gather_nd(tensor, tensor_idx), tf.shape(tensor)) return tensor_sparse