def get_bilinear_classifier(self, layer, token_weights, variable_scope=None, reuse=False): """""" recur_layer = layer hidden_keep_prob = 1 if reuse else self.hidden_keep_prob hidden_func = self.hidden_func hidden_size = self.hidden_size add_linear = self.add_linear linearize = self.linearize distance = self.distance n_splits = 2*(1+linearize+distance) with tf.variable_scope(variable_scope or self.field): for i in six.moves.range(0, self.n_layers-1): with tf.variable_scope('FC-%d' % i): layer = classifiers.hidden(layer, n_splits*hidden_size, hidden_func=hidden_func, hidden_keep_prob=hidden_keep_prob) with tf.variable_scope('FC-top'): layers = classifiers.hiddens(layer, n_splits*[hidden_size], hidden_func=hidden_func, hidden_keep_prob=hidden_keep_prob) layer1, layer2 = layers.pop(0), layers.pop(0) if linearize: lin_layer1, lin_layer2 = layers.pop(0), layers.pop(0) if distance: dist_layer1, dist_layer2 = layers.pop(0), layers.pop(0) with tf.variable_scope('Attention'): if self.diagonal: logits, _ = classifiers.diagonal_bilinear_attention( layer1, layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) if linearize: with tf.variable_scope('Linearization'): lin_logits = classifiers.diagonal_bilinear_discriminator( lin_layer1, lin_layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) if distance: with tf.variable_scope('Distance'): dist_lamda = 1+tf.nn.softplus(classifiers.diagonal_bilinear_discriminator( dist_layer1, dist_layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear)) else: logits, _ = classifiers.bilinear_attention( layer1, layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) if linearize: with tf.variable_scope('Linearization'): lin_logits = classifiers.bilinear_discriminator( lin_layer1, lin_layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) if distance: with tf.variable_scope('Distance'): dist_lamda = 1+tf.nn.softplus(classifiers.bilinear_discriminator( dist_layer1, dist_layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear)) #----------------------------------------------------------- # Process the targets targets = self.placeholder shape = tf.shape(layer1) batch_size, bucket_size = shape[0], shape[1] # (1 x m) ids = tf.expand_dims(tf.range(bucket_size), 0) # (1 x m) -> (1 x 1 x m) head_ids = tf.expand_dims(ids, -2) # (1 x m) -> (1 x m x 1) dep_ids = tf.expand_dims(ids, -1) if linearize: # Wherever the head is to the left # (n x m), (1 x m) -> (n x m) lin_targets = tf.to_float(tf.less(targets, ids)) # cross-entropy of the linearization of each i,j pair # (1 x 1 x m), (1 x m x 1) -> (n x m x m) lin_ids = tf.tile(tf.less(head_ids, dep_ids), [batch_size, 1, 1]) # (n x 1 x m), (n x m x 1) -> (n x m x m) lin_xent = -tf.nn.softplus(tf.where(lin_ids, -lin_logits, lin_logits)) # add the cross-entropy to the logits # (n x m x m), (n x m x m) -> (n x m x m) logits += tf.stop_gradient(lin_xent) if distance: # (n x m) - (1 x m) -> (n x m) dist_targets = tf.abs(targets - ids) # KL-divergence of the distance of each i,j pair # (1 x 1 x m) - (1 x m x 1) -> (n x m x m) dist_ids = tf.to_float(tf.tile(tf.abs(head_ids - dep_ids), [batch_size, 1, 1]))+1e-12 # (n x m x m), (n x m x m) -> (n x m x m) #dist_kld = (dist_ids * tf.log(dist_lamda / dist_ids) + dist_ids - dist_lamda) dist_kld = -tf.log((dist_ids - dist_lamda)**2/2 + 1) # add the KL-divergence to the logits # (n x m x m), (n x m x m) -> (n x m x m) logits += tf.stop_gradient(dist_kld) #----------------------------------------------------------- # Compute probabilities/cross entropy # (n x m) + (m) -> (n x m) non_pads = tf.to_float(token_weights) + tf.to_float(tf.logical_not(tf.cast(tf.range(bucket_size), dtype=tf.bool))) # (n x m x m) o (n x 1 x m) -> (n x m x m) probabilities = tf.nn.softmax(logits) * tf.expand_dims(non_pads, -2) # (n x m), (n x m x m), (n x m) -> () loss = tf.losses.sparse_softmax_cross_entropy( targets, logits, weights=token_weights) # (n x m) -> (n x m x m x 1) one_hot_targets = tf.expand_dims(tf.one_hot(targets, bucket_size), -1) # (n x m) -> () n_tokens = tf.to_float(tf.reduce_sum(token_weights)) if linearize: # (n x m x m) -> (n x m x 1 x m) lin_xent_reshaped = tf.expand_dims(lin_xent, -2) # (n x m x 1 x m) * (n x m x m x 1) -> (n x m x 1 x 1) lin_target_xent = tf.matmul(lin_xent_reshaped, one_hot_targets) # (n x m x 1 x 1) -> (n x m) lin_target_xent = tf.squeeze(lin_target_xent, [-1, -2]) # (n x m), (n x m), (n x m) -> () loss -= tf.reduce_sum(lin_target_xent*tf.to_float(token_weights)) / (n_tokens + 1e-12) if distance: # (n x m x m) -> (n x m x 1 x m) dist_kld_reshaped = tf.expand_dims(dist_kld, -2) # (n x m x 1 x m) * (n x m x m x 1) -> (n x m x 1 x 1) dist_target_kld = tf.matmul(dist_kld_reshaped, one_hot_targets) # (n x m x 1 x 1) -> (n x m) dist_target_kld = tf.squeeze(dist_target_kld, [-1, -2]) # (n x m), (n x m), (n x m) -> () loss -= tf.reduce_sum(dist_target_kld*tf.to_float(token_weights)) / (n_tokens + 1e-12) #----------------------------------------------------------- # Compute predictions/accuracy # (n x m x m) -> (n x m) predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # (n x m) (*) (n x m) -> (n x m) correct_tokens = nn.equal(targets, predictions) * token_weights # (n x m) -> (n) tokens_per_sequence = tf.reduce_sum(token_weights, axis=-1) # (n x m) -> (n) correct_tokens_per_sequence = tf.reduce_sum(correct_tokens, axis=-1) # (n), (n) -> (n) correct_sequences = nn.equal(tokens_per_sequence, correct_tokens_per_sequence) #----------------------------------------------------------- # Populate the output dictionary outputs = {} outputs['recur_layer'] = recur_layer outputs['unlabeled_targets'] = self.placeholder outputs['probabilities'] = probabilities outputs['unlabeled_loss'] = loss outputs['loss'] = loss outputs['unlabeled_predictions'] = predictions outputs['predictions'] = predictions outputs['correct_unlabeled_tokens'] = correct_tokens outputs['n_correct_unlabeled_tokens'] = tf.reduce_sum(correct_tokens) outputs['n_correct_unlabeled_sequences'] = tf.reduce_sum(correct_sequences) outputs['n_correct_tokens'] = tf.reduce_sum(correct_tokens) outputs['n_correct_sequences'] = tf.reduce_sum(correct_sequences) return outputs
def get_bilinear_discriminator(self, layer, token_weights, variable_scope=None, reuse=False): """""" recur_layer = layer hidden_keep_prob = 1 if reuse else self.hidden_keep_prob add_linear = self.add_linear with tf.variable_scope(variable_scope or self.classname): for i in six.moves.range(0, self.n_layers-1): with tf.variable_scope('FC-%d' % i): layer = classifiers.hidden(layer, 2*self.hidden_size, hidden_func=self.hidden_func, hidden_keep_prob=hidden_keep_prob) with tf.variable_scope('FC-top' % i): layers = classifiers.hiddens(layer, 2*[self.hidden_size], hidden_func=self.hidden_func, hidden_keep_prob=hidden_keep_prob) layer1, layer2 = layers.pop(0), layers.pop(0) with tf.variable_scope('Discriminator'): if self.diagonal: logits = classifiers.diagonal_bilinear_discriminator( layer1, layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) else: logits = classifiers.bilinear_discriminator( layer1, layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) #----------------------------------------------------------- # Process the targets # (n x m x m) -> (n x m x m) unlabeled_targets = nn.greater(self.placeholder, 0) #----------------------------------------------------------- # Compute probabilities/cross entropy # (n x m x m) -> (n x m x m) probabilities = tf.nn.sigmoid(logits) # (n x m x m), (n x m x m x c), (n x m x m) -> () loss = tf.losses.sigmoid_cross_entropy(unlabeled_targets, logits, weights=token_weights) #----------------------------------------------------------- # Compute predictions/accuracy # (n x m x m x c) -> (n x m x m) predictions = nn.greater(logits, 0, dtype=tf.int32) * token_weights # (n x m x m) (*) (n x m x m) -> (n x m x m) true_positives = predictions * unlabeled_targets # (n x m x m) -> () n_predictions = tf.reduce_sum(predictions) n_targets = tf.reduce_sum(unlabeled_targets) n_true_positives = tf.reduce_sum(true_positives) # () - () -> () n_false_positives = n_predictions - n_true_positives n_false_negatives = n_targets - n_true_positives # (n x m x m) -> (n) n_targets_per_sequence = tf.reduce_sum(unlabeled_targets, axis=[1,2]) n_true_positives_per_sequence = tf.reduce_sum(true_positives, axis=[1,2]) # (n) x 2 -> () n_correct_sequences = tf.reduce_sum(nn.equal(n_true_positives_per_sequence, n_targets_per_sequence)) #----------------------------------------------------------- # Populate the output dictionary outputs = {} outputs['recur_layer'] = recur_layer outputs['unlabeled_targets'] = unlabeled_targets outputs['probabilities'] = probabilities outputs['unlabeled_loss'] = loss outputs['loss'] = loss outputs['unlabeled_predictions'] = predictions outputs['n_unlabeled_true_positives'] = n_true_positives outputs['n_unlabeled_false_positives'] = n_false_positives outputs['n_unlabeled_false_negatives'] = n_false_negatives outputs['n_correct_unlabeled_sequences'] = n_correct_sequences outputs['predictions'] = predictions outputs['n_true_positives'] = n_true_positives outputs['n_false_positives'] = n_false_positives outputs['n_false_negatives'] = n_false_negatives outputs['n_correct_sequences'] = n_correct_sequences return outputs
def get_bilinear_discriminator(self, layer, token_weights, variable_scope=None, reuse=False): """""" recur_layer = layer hidden_keep_prob = 1 if reuse else self.hidden_keep_prob add_linear = self.add_linear n_splits = 2*(1+self.linearize+self.distance) with tf.variable_scope(variable_scope or self.field): for i in six.moves.range(0, self.n_layers-1): with tf.variable_scope('FC-%d' % i): layer = classifiers.hidden(layer, n_splits*self.hidden_size, hidden_func=self.hidden_func, hidden_keep_prob=hidden_keep_prob) with tf.variable_scope('FC-top'): layers = classifiers.hiddens(layer, n_splits*[self.hidden_size], hidden_func=self.hidden_func, hidden_keep_prob=hidden_keep_prob) layer1, layer2 = layers.pop(0), layers.pop(0) if self.linearize: lin_layer1, lin_layer2 = layers.pop(0), layers.pop(0) if self.distance: dist_layer1, dist_layer2 = layers.pop(0), layers.pop(0) with tf.variable_scope('Discriminator'): if self.diagonal: logits = classifiers.diagonal_bilinear_discriminator( layer1, layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) if self.linearize: with tf.variable_scope('Linearization'): lin_logits = classifiers.diagonal_bilinear_discriminator( lin_layer1, lin_layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) if self.distance: with tf.variable_scope('Distance'): dist_lamda = 1+tf.nn.softplus(classifiers.diagonal_bilinear_discriminator( dist_layer1, dist_layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear)) else: logits = classifiers.bilinear_discriminator( layer1, layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) if self.linearize: with tf.variable_scope('Linearization'): lin_logits = classifiers.bilinear_discriminator( lin_layer1, lin_layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) if self.distance: with tf.variable_scope('Distance'): dist_lamda = 1+tf.nn.softplus(classifiers.bilinear_discriminator( dist_layer1, dist_layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear)) #----------------------------------------------------------- # Process the targets # (n x m x m) -> (n x m x m) unlabeled_targets = self.placeholder shape = tf.shape(layer1) batch_size, bucket_size = shape[0], shape[1] # (1 x m) ids = tf.expand_dims(tf.range(bucket_size), 0) # (1 x m) -> (1 x 1 x m) head_ids = tf.expand_dims(ids, -2) # (1 x m) -> (1 x m x 1) dep_ids = tf.expand_dims(ids, -1) if self.linearize: # Wherever the head is to the left # (n x m x m), (1 x m x 1) -> (n x m x m) lin_targets = tf.to_float(tf.less(unlabeled_targets, dep_ids)) # cross-entropy of the linearization of each i,j pair # (1 x 1 x m), (1 x m x 1) -> (n x m x m) lin_ids = tf.tile(tf.less(head_ids, dep_ids), [batch_size, 1, 1]) # (n x 1 x m), (n x m x 1) -> (n x m x m) lin_xent = -tf.nn.softplus(tf.where(lin_ids, -lin_logits, lin_logits)) # add the cross-entropy to the logits # (n x m x m), (n x m x m) -> (n x m x m) logits += tf.stop_gradient(lin_xent) if self.distance: # (n x m x m) - (1 x m x 1) -> (n x m x m) dist_targets = tf.abs(unlabeled_targets - dep_ids) # KL-divergence of the distance of each i,j pair # (1 x 1 x m) - (1 x m x 1) -> (n x m x m) dist_ids = tf.to_float(tf.tile(tf.abs(head_ids - dep_ids), [batch_size, 1, 1]))+1e-12 # (n x m x m), (n x m x m) -> (n x m x m) #dist_kld = (dist_ids * tf.log(dist_lamda / dist_ids) + dist_ids - dist_lamda) dist_kld = -tf.log((dist_ids - dist_lamda)**2/2 + 1) # add the KL-divergence to the logits # (n x m x m), (n x m x m) -> (n x m x m) logits += tf.stop_gradient(dist_kld) #----------------------------------------------------------- # Compute probabilities/cross entropy # (n x m x m) -> (n x m x m) probabilities = tf.nn.sigmoid(logits) * tf.to_float(token_weights) # (n x m x m), (n x m x m), (n x m x m) -> () loss = tf.losses.sigmoid_cross_entropy(unlabeled_targets, logits, weights=token_weights) n_tokens = tf.to_float(tf.reduce_sum(token_weights)) if self.linearize: lin_target_xent = lin_xent * unlabeled_targets loss -= tf.reduce_sum(lin_target_xent * tf.to_float(token_weights)) / (n_tokens + 1e-12) if self.distance: dist_target_kld = dist_kld * unlabeled_targets loss -= tf.reduce_sum(dist_target_kld * tf.to_float(token_weights)) / (n_tokens + 1e-12) #----------------------------------------------------------- # Compute predictions/accuracy # (n x m x m) -> (n x m x m) predictions = nn.greater(logits, 0, dtype=tf.int64) * token_weights # (n x m x m) (*) (n x m x m) -> (n x m x m) true_positives = predictions * unlabeled_targets # (n x m x m) -> () n_predictions = tf.reduce_sum(predictions) n_targets = tf.reduce_sum(unlabeled_targets) n_true_positives = tf.reduce_sum(true_positives) # () - () -> () n_false_positives = n_predictions - n_true_positives n_false_negatives = n_targets - n_true_positives # (n x m x m) -> (n) n_targets_per_sequence = tf.reduce_sum(unlabeled_targets, axis=[1,2]) n_true_positives_per_sequence = tf.reduce_sum(true_positives, axis=[1,2]) # (n) x 2 -> () n_correct_sequences = tf.reduce_sum(nn.equal(n_true_positives_per_sequence, n_targets_per_sequence)) #----------------------------------------------------------- # Populate the output dictionary outputs = {} outputs['unlabeled_targets'] = unlabeled_targets outputs['probabilities'] = probabilities outputs['unlabeled_loss'] = loss outputs['loss'] = loss outputs['logits'] = logits outputs['unlabeled_predictions'] = predictions outputs['n_unlabeled_true_positives'] = n_true_positives outputs['n_unlabeled_false_positives'] = n_false_positives outputs['n_unlabeled_false_negatives'] = n_false_negatives outputs['n_correct_unlabeled_sequences'] = n_correct_sequences outputs['predictions'] = predictions outputs['n_true_positives'] = n_true_positives outputs['n_false_positives'] = n_false_positives outputs['n_false_negatives'] = n_false_negatives outputs['n_correct_sequences'] = n_correct_sequences return outputs
def get_bilinear_discriminator(self, layer, token_weights, variable_scope=None, reuse=False, debug=False): """""" #pdb.set_trace() outputs = {} recur_layer = layer hidden_keep_prob = 1 if reuse else self.hidden_keep_prob add_linear = self.add_linear n_splits = 2 * (1 + self.linearize + self.distance) with tf.variable_scope(variable_scope or self.field): for i in six.moves.range(0, self.n_layers - 1): with tf.variable_scope('FC-%d' % i): #here is FNN? did not run layer = classifiers.hidden( layer, n_splits * self.hidden_size, hidden_func=self.hidden_func, hidden_keep_prob=hidden_keep_prob) with tf.variable_scope( 'FC-top'): #FNN output and split two layer? FNN+split layers = classifiers.hiddens(layer, n_splits * [self.hidden_size], hidden_func=self.hidden_func, hidden_keep_prob=hidden_keep_prob) layer1, layer2 = layers.pop(0), layers.pop( 0 ) #layer1 and layer2 are one sentence with different word combination? layer1 head layer2 tail if self.linearize: #false lin_layer1, lin_layer2 = layers.pop(0), layers.pop(0) if self.distance: #false in graph dist_layer1, dist_layer2 = layers.pop(0), layers.pop(0) with tf.variable_scope('Discriminator'): if self.diagonal: logits = classifiers.diagonal_bilinear_discriminator( layer1, layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) if self.linearize: with tf.variable_scope('Linearization'): lin_logits = classifiers.diagonal_bilinear_discriminator( lin_layer1, lin_layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) if self.distance: with tf.variable_scope('Distance'): dist_lamda = 1 + tf.nn.softplus( classifiers.diagonal_bilinear_discriminator( dist_layer1, dist_layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear)) else: #only run here logits = classifiers.bilinear_discriminator( layer1, layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) if self.linearize: with tf.variable_scope('Linearization'): lin_logits = classifiers.bilinear_discriminator( lin_layer1, lin_layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear) if self.distance: with tf.variable_scope('Distance'): dist_lamda = 1 + tf.nn.softplus( classifiers.bilinear_discriminator( dist_layer1, dist_layer2, hidden_keep_prob=hidden_keep_prob, add_linear=add_linear)) #----------------------------------------------------------- # Process the targets # (n x m x m) -> (n x m x m) #here in fact is a graph, which is m*m representing the connection between each edge unlabeled_targets = self.placeholder #ground truth graph, what is self.placeholder? #USELESS shape = tf.shape(layer1) batch_size, bucket_size = shape[0], shape[1] # (1 x m) ids = tf.expand_dims(tf.range(bucket_size), 0) # (1 x m) -> (1 x 1 x m) head_ids = tf.expand_dims(ids, -2) # (1 x m) -> (1 x m x 1) dep_ids = tf.expand_dims(ids, -1) #no running here if self.linearize: #So what is linearize? The linear part of bilinear? # Wherever the head is to the left # (n x m x m), (1 x m x 1) -> (n x m x m) lin_targets = tf.to_float( tf.less(unlabeled_targets, dep_ids)) # cross-entropy of the linearization of each i,j pair # (1 x 1 x m), (1 x m x 1) -> (n x m x m) lin_ids = tf.tile(tf.less(head_ids, dep_ids), [batch_size, 1, 1]) # (n x 1 x m), (n x m x 1) -> (n x m x m) lin_xent = -tf.nn.softplus( tf.where(lin_ids, -lin_logits, lin_logits)) # add the cross-entropy to the logits # (n x m x m), (n x m x m) -> (n x m x m) logits += tf.stop_gradient(lin_xent) if self.distance: # (n x m x m) - (1 x m x 1) -> (n x m x m) dist_targets = tf.abs(unlabeled_targets - dep_ids) # KL-divergence of the distance of each i,j pair # (1 x 1 x m) - (1 x m x 1) -> (n x m x m) dist_ids = tf.to_float( tf.tile(tf.abs(head_ids - dep_ids), [batch_size, 1, 1])) + 1e-12 # (n x m x m), (n x m x m) -> (n x m x m) #dist_kld = (dist_ids * tf.log(dist_lamda / dist_ids) + dist_ids - dist_lamda) dist_kld = -tf.log((dist_ids - dist_lamda)**2 / 2 + 1) # add the KL-divergence to the logits # (n x m x m), (n x m x m) -> (n x m x m) logits += tf.stop_gradient(dist_kld) if debug: outputs['printdata'] = {} outputs['printdata']['logits'] = logits #----------------------------------------------------------- # Compute probabilities/cross entropy # (n x m x m) -> (n x m x m) probabilities = tf.nn.sigmoid(logits) * tf.to_float( token_weights) #token weights is sentence length? # (n x m x m), (n x m x m), (n x m x m) -> () loss = tf.losses.sigmoid_cross_entropy( unlabeled_targets, logits, weights=token_weights ) #here label_smoothing is 0, the sigmoid XE have any effect? n_tokens = tf.to_float(tf.reduce_sum(token_weights)) if self.linearize: lin_target_xent = lin_xent * unlabeled_targets loss -= tf.reduce_sum( lin_target_xent * tf.to_float(token_weights)) / (n_tokens + 1e-12) if self.distance: dist_target_kld = dist_kld * unlabeled_targets loss -= tf.reduce_sum( dist_target_kld * tf.to_float(token_weights)) / (n_tokens + 1e-12) #----------------------------------------------------------- # Compute predictions/accuracy # precision/recall # (n x m x m) -> (n x m x m) predictions = nn.greater( logits, 0, dtype=tf.int32) * token_weights #edge that predicted # if self.compare_precision: # #pdb.set_trace() # # (n x m x m) -> (n x m) # temp_predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # # (n x m) -> (n x m x m) # cond = tf.equal(logits, tf.expand_dims(tf.reduce_max(logits,-1),-1)) # predictions = tf.where(cond, tf.cast(cond,tf.float32), tf.zeros_like(logits)) # predictions = tf.cast(predictions,tf.int32) * token_weights # # # (n x m) (*) (n x m) -> (n x m) # # n_true_positives = tf.reduce_sum(nn.equal(tf.argmax(unlabeled_targets,axis=-1, output_type=tf.int32), temp_predictions) * self.token_weights) # # n_predictions_temp = tf.reduce_sum(temp_predictions) # # n_false_positives = n_predictions_temp - n_true_positives # (n x m x m) (*) (n x m x m) -> (n x m x m) true_positives = predictions * unlabeled_targets # (n x m x m) -> () n_predictions = tf.reduce_sum(predictions) n_targets = tf.reduce_sum(unlabeled_targets) n_true_positives = tf.reduce_sum(true_positives) # () - () -> () n_false_positives = n_predictions - n_true_positives n_false_negatives = n_targets - n_true_positives # (n x m x m) -> (n) n_targets_per_sequence = tf.reduce_sum(unlabeled_targets, axis=[1, 2]) n_true_positives_per_sequence = tf.reduce_sum(true_positives, axis=[1, 2]) # (n) x 2 -> () n_correct_sequences = tf.reduce_sum( nn.equal(n_true_positives_per_sequence, n_targets_per_sequence)) #----------------------------------------------------------- # Populate the output dictionary outputs['unlabeled_targets'] = unlabeled_targets outputs['probabilities'] = probabilities outputs['unlabeled_loss'] = loss outputs['loss'] = loss if debug: outputs['temp_targets'] = tf.argmax(unlabeled_targets, axis=-1, output_type=tf.int32) # outputs['temp_predictions'] = temp_predictions outputs['unlabeled_predictions'] = predictions outputs['n_unlabeled_true_positives'] = n_true_positives outputs['n_unlabeled_false_positives'] = n_false_positives outputs['n_unlabeled_false_negatives'] = n_false_negatives outputs['n_correct_unlabeled_sequences'] = n_correct_sequences outputs['predictions'] = predictions outputs['n_true_positives'] = n_true_positives outputs['n_false_positives'] = n_false_positives outputs['n_false_negatives'] = n_false_negatives outputs['n_correct_sequences'] = n_correct_sequences return outputs