def _compute_rezoom_loss(hypes, rezoom_loss_input, slow=False): """ Computes loss for delta output. Only relevant if rezoom layers are used. """ grid_size = hypes['grid_width'] * hypes['grid_height'] outer_size = grid_size * hypes['batch_size'] head = hypes['solver']['head_weights'] regression_weights = hypes['solver']['regression_weights'] perm_truth, depths_truth, locations_truth, pred_boxes, classes, pred_mask, pred_depths, pred_locations, \ pred_confs_deltas, pred_boxes_deltas, pred_depths_deltas, pred_locations_deltas, mask_r = rezoom_loss_input if hypes['rezoom_change_loss'] == 'center': error = (perm_truth[:, :, 0:2] - pred_boxes[:, :, 0:2]) \ / tf.maximum(perm_truth[:, :, 2:4], 1.) square_error = tf.reduce_sum(tf.square(error), 2) inside = tf.reshape(tf.to_int64( tf.logical_and(tf.less(square_error, 0.2**2), tf.greater(classes, 0))), [-1]) elif hypes['rezoom_change_loss'] == 'iou': pred_boxes_flat = tf.reshape(pred_boxes, [-1, 4]) perm_truth_flat = tf.reshape(perm_truth, [-1, 4]) iou = train_utils.iou(train_utils.to_x1y1x2y2(pred_boxes_flat), train_utils.to_x1y1x2y2(perm_truth_flat)) inside = tf.reshape(tf.to_int64(tf.greater(iou, 0.5)), [-1]) else: assert not hypes['rezoom_change_loss'] inside = tf.reshape(tf.to_int64((tf.greater(classes, 0))), [-1]) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=pred_confs_deltas, labels=inside) delta_confs_loss = tf.reduce_sum(cross_entropy*mask_r) \ / outer_size * hypes['solver']['head_weights'][0] * 0.1 delta_unshaped = perm_truth - (pred_boxes + pred_boxes_deltas) delta_residual = tf.reshape(delta_unshaped * pred_mask, [outer_size, hypes['rnn_len'], 4]) sqrt_delta = tf.minimum(tf.square(delta_residual), 10. ** 2) delta_boxes_loss = tf.reduce_sum(sqrt_delta) / outer_size * head[1] * 0.05 pred_mask = tf.squeeze(pred_mask, 2) """ delta_depths_unshaped = (depths_truth - (pred_depths + pred_depths_deltas)) / (depths_truth + 1e-5) delta_depths_residual = tf.reshape(delta_depths_unshaped * pred_mask, [outer_size, hypes['rnn_len'], 1]) sqrt_delta_depths = tf.abs(delta_depths_residual) delta_depths_loss = (tf.reduce_sum(sqrt_delta_depths) / (1e-5 + tf.reduce_sum(pred_mask))) * head[1] * 0.1 * regression_weights[0] """ delta_depths_loss = tf.reduce_sum(tf.abs((depths_truth - (pred_depths + pred_depths_deltas))) * tf.reshape(pred_mask, (outer_size, 1))) * 0.05 / outer_size delta_locations_loss = tf.reduce_sum(tf.abs((locations_truth - (pred_locations + pred_locations_deltas))) * pred_mask) * 0.05 / outer_size return delta_confs_loss, delta_boxes_loss, delta_depths_loss, delta_locations_loss
def _compute_rezoom_loss(hypes, rezoom_loss_input): """ Computes loss for delta output. Only relevant if rezoom layers are used. """ grid_size = hypes['grid_width'] * hypes['grid_height'] outer_size = grid_size * hypes['batch_size'] head = hypes['solver']['head_weights'] perm_truth, pred_boxes, classes, pred_mask, \ pred_confs_deltas, pred_boxes_deltas, mask_r = rezoom_loss_input if hypes['rezoom_change_loss'] == 'center': inside = tf.reshape(tf.to_int64(classes), [-1]) elif hypes['rezoom_change_loss'] == 'iou': pred_boxes_flat = tf.reshape(pred_boxes, [-1, 4]) perm_truth_flat = tf.reshape(perm_truth, [-1, 4]) iou = train_utils.iou(train_utils.to_x1y1x2y2(pred_boxes_flat), train_utils.to_x1y1x2y2(perm_truth_flat)) inside = tf.reshape(tf.to_int64(tf.greater(iou, 0.5)), [-1]) else: assert not hypes['rezoom_change_loss'] inside = tf.reshape(tf.to_int64((tf.greater(classes, 0))), [-1]) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=pred_confs_deltas, labels=inside) delta_confs_loss = tf.reduce_sum(cross_entropy*mask_r) \ / outer_size * hypes['solver']['head_weights'][0] * 0.1 delta_unshaped = perm_truth - (pred_boxes + pred_boxes_deltas) delta_residual = tf.reshape(delta_unshaped * pred_mask, [outer_size, hypes['rnn_len'], 4]) sqrt_delta = tf.minimum(tf.square(delta_residual), 10.**2) delta_boxes_loss = (tf.reduce_sum(sqrt_delta) / outer_size * head[1] * 0.03) return delta_confs_loss, delta_boxes_loss
def build_forward_backward(H, x, phase, boxes, flags): ''' Call build_forward() and then setup the loss functions ''' grid_size = H['grid_width'] * H['grid_height'] outer_size = grid_size * H['batch_size'] reuse = {'train': None, 'test': True}[phase] if H['use_rezoom']: (pred_boxes, pred_logits, pred_confidences, pred_confs_deltas, pred_boxes_deltas) = build_forward(H, x, phase, reuse) else: pred_boxes, pred_logits, pred_confidences = build_forward( H, x, phase, reuse) with tf.variable_scope('decoder', reuse={ 'train': None, 'test': True }[phase]): outer_boxes = tf.reshape(boxes, [outer_size, H['rnn_len'], 4]) outer_flags = tf.cast(tf.reshape(flags, [outer_size, H['rnn_len']]), 'int32') if H['use_lstm']: hungarian_module = tf.load_op_library( 'utils/hungarian/hungarian.so') assignments, classes, perm_truth, pred_mask = ( hungarian_module.hungarian(pred_boxes, outer_boxes, outer_flags, H['solver']['hungarian_iou'])) else: classes = tf.reshape(flags, (outer_size, 1)) perm_truth = tf.reshape(outer_boxes, (outer_size, 1, 4)) pred_mask = tf.reshape(tf.cast(tf.greater(classes, 0), 'float32'), (outer_size, 1, 1)) true_classes = tf.reshape(tf.cast(tf.greater(classes, 0), 'int64'), [outer_size * H['rnn_len']]) pred_logit_r = tf.reshape( pred_logits, [outer_size * H['rnn_len'], H['num_classes']]) confidences_loss = (tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=pred_logit_r, labels=true_classes)) ) / outer_size * H['solver']['head_weights'][0] residual = tf.reshape(perm_truth - pred_boxes * pred_mask, [outer_size, H['rnn_len'], 4]) boxes_loss = tf.reduce_sum( tf.abs(residual)) / outer_size * H['solver']['head_weights'][1] if H['use_rezoom']: if H['rezoom_change_loss'] == 'center': error = (perm_truth[:, :, 0:2] - pred_boxes[:, :, 0:2]) / tf.maximum( perm_truth[:, :, 2:4], 1.) square_error = tf.reduce_sum(tf.square(error), 2) inside = tf.reshape( tf.to_int64( tf.logical_and(tf.less(square_error, 0.2**2), tf.greater(classes, 0))), [-1]) elif H['rezoom_change_loss'] == 'iou': iou = train_utils.iou( train_utils.to_x1y1x2y2(tf.reshape(pred_boxes, [-1, 4])), train_utils.to_x1y1x2y2(tf.reshape(perm_truth, [-1, 4]))) inside = tf.reshape(tf.to_int64(tf.greater(iou, 0.5)), [-1]) else: assert H['rezoom_change_loss'] == False inside = tf.reshape(tf.to_int64((tf.greater(classes, 0))), [-1]) new_confs = tf.reshape( pred_confs_deltas, [outer_size * H['rnn_len'], H['num_classes']]) delta_confs_loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=new_confs, labels=inside) ) / outer_size * H['solver']['head_weights'][0] * 0.1 pred_logits_squash = tf.reshape( new_confs, [outer_size * H['rnn_len'], H['num_classes']]) pred_confidences_squash = tf.nn.softmax(pred_logits_squash) pred_confidences = tf.reshape( pred_confidences_squash, [outer_size, H['rnn_len'], H['num_classes']]) loss = confidences_loss + boxes_loss + delta_confs_loss if H['reregress']: delta_residual = tf.reshape( perm_truth - (pred_boxes + pred_boxes_deltas) * pred_mask, [outer_size, H['rnn_len'], 4]) delta_boxes_loss = (tf.reduce_sum( tf.minimum(tf.square(delta_residual), 10.**2)) / outer_size * H['solver']['head_weights'][1] * 0.03) boxes_loss = delta_boxes_loss tf.summary.histogram(phase + '/delta_hist0_x', pred_boxes_deltas[:, 0, 0]) tf.summary.histogram(phase + '/delta_hist0_y', pred_boxes_deltas[:, 0, 1]) tf.summary.histogram(phase + '/delta_hist0_w', pred_boxes_deltas[:, 0, 2]) tf.summary.histogram(phase + '/delta_hist0_h', pred_boxes_deltas[:, 0, 3]) loss += delta_boxes_loss else: loss = confidences_loss + boxes_loss return pred_boxes, pred_confidences, loss, confidences_loss, boxes_loss
def build_forward_backward(H, x, googlenet, phase, boxes, flags): ''' Call build_forward() and then setup the loss functions ''' grid_size = H['grid_width'] * H['grid_height'] outer_size = grid_size * H['batch_size'] reuse = {'train': None, 'test': True}[phase] if H['use_rezoom']: (pred_boxes, pred_logits, pred_confidences, pred_confs_deltas, pred_boxes_deltas) = build_forward(H, x, googlenet, phase, reuse) else: pred_boxes, pred_logits, pred_confidences = build_forward(H, x, googlenet, phase, reuse) with tf.variable_scope('decoder', reuse={'train': None, 'test': True}[phase]): outer_boxes = tf.reshape(boxes, [outer_size, H['rnn_len'], 4]) outer_flags = tf.cast(tf.reshape(flags, [outer_size, H['rnn_len']]), 'int32') if H['use_lstm']: assignments, classes, perm_truth, pred_mask = ( tf.user_ops.hungarian(pred_boxes, outer_boxes, outer_flags, H['solver']['hungarian_iou'])) else: classes = tf.reshape(flags, (outer_size, 1)) perm_truth = tf.reshape(outer_boxes, (outer_size, 1, 4)) pred_mask = tf.reshape(tf.cast(tf.greater(classes, 0), 'float32'), (outer_size, 1, 1)) true_classes = tf.reshape(tf.cast(tf.greater(classes, 0), 'int64'), [outer_size * H['rnn_len']]) pred_logit_r = tf.reshape(pred_logits, [outer_size * H['rnn_len'], H['num_classes']]) confidences_loss = (tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits(pred_logit_r, true_classes)) ) / outer_size * H['solver']['head_weights'][0] residual = tf.reshape(perm_truth - pred_boxes * pred_mask, [outer_size, H['rnn_len'], 4]) boxes_loss = tf.reduce_sum(tf.abs(residual)) / outer_size * H['solver']['head_weights'][1] if H['use_rezoom']: if H['rezoom_change_loss'] == 'center': error = (perm_truth[:, :, 0:2] - pred_boxes[:, :, 0:2]) / tf.maximum(perm_truth[:, :, 2:4], 1.) square_error = tf.reduce_sum(tf.square(error), 2) inside = tf.reshape(tf.to_int64(tf.logical_and(tf.less(square_error, 0.2**2), tf.greater(classes, 0))), [-1]) elif H['rezoom_change_loss'] == 'iou': iou = train_utils.iou(train_utils.to_x1y1x2y2(tf.reshape(pred_boxes, [-1, 4])), train_utils.to_x1y1x2y2(tf.reshape(perm_truth, [-1, 4]))) inside = tf.reshape(tf.to_int64(tf.greater(iou, 0.5)), [-1]) else: assert H['rezoom_change_loss'] == False inside = tf.reshape(tf.to_int64((tf.greater(classes, 0))), [-1]) new_confs = tf.reshape(pred_confs_deltas, [outer_size * H['rnn_len'], H['num_classes']]) delta_confs_loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits(new_confs, inside)) / outer_size * H['solver']['head_weights'][0] * 0.1 pred_logits_squash = tf.reshape(new_confs, [outer_size * H['rnn_len'], H['num_classes']]) pred_confidences_squash = tf.nn.softmax(pred_logits_squash) pred_confidences = tf.reshape(pred_confidences_squash, [outer_size, H['rnn_len'], H['num_classes']]) loss = confidences_loss + boxes_loss + delta_confs_loss if H['reregress']: delta_residual = tf.reshape(perm_truth - (pred_boxes + pred_boxes_deltas) * pred_mask, [outer_size, H['rnn_len'], 4]) delta_boxes_loss = (tf.reduce_sum(tf.minimum(tf.square(delta_residual), 10. ** 2)) / outer_size * H['solver']['head_weights'][1] * 0.03) boxes_loss = delta_boxes_loss tf.histogram_summary(phase + '/delta_hist0_x', pred_boxes_deltas[:, 0, 0]) tf.histogram_summary(phase + '/delta_hist0_y', pred_boxes_deltas[:, 0, 1]) tf.histogram_summary(phase + '/delta_hist0_w', pred_boxes_deltas[:, 0, 2]) tf.histogram_summary(phase + '/delta_hist0_h', pred_boxes_deltas[:, 0, 3]) loss += delta_boxes_loss else: loss = confidences_loss + boxes_loss return pred_boxes, pred_confidences, loss, confidences_loss, boxes_loss
perm_truth, pred_boxes, classes, pred_mask, \ pred_confs_deltas, pred_boxes_deltas, mask_r = rezoom_loss_input #if rezoom layers are used, calculate the loss #if rezoom change loss is center if hypes['rezoom_change_loss'] == 'center': error = (perm_truth[:, :, 0:2] - pred_boxes[:, :, 0:2]) \ / tf.maximum(perm_truth[:, :, 2:4], 1.) square_error = tf.reduce_sum(tf.square(error), 2) inside = tf.reshape(tf.to_int64( tf.logical_and(tf.less(square_error, 0.2**2), tf.greater(classes, 0))), [-1]) #if rezoom loss is iou elif hypes['rezoom_change_loss'] == 'iou': pred_boxes_flat = tf.reshape(pred_boxes, [-1, 4]) perm_truth_flat = tf.reshape(perm_truth, [-1, 4]) iou = train_utils.iou(train_utils.to_x1y1x2y2(pred_boxes_flat), train_utils.to_x1y1x2y2(perm_truth_flat)) inside = tf.reshape(tf.to_int64(tf.greater(iou, 0.5)), [-1]) else: #checks rezoom is not in hypes assert not hypes['rezoom_change_loss'] inside = tf.reshape(tf.to_int64((tf.greater(classes, 0))), [-1]) #cross_entropy cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=pred_confs_deltas, labels=inside) #change in loss calculation delta_confs_loss = tf.reduce_sum(cross_entropy*mask_r) \ / outer_size * hypes['solver']['head_weights'][0] * 0.1 delta_unshaped = perm_truth - (pred_boxes + pred_boxes_deltas)
def loss(hypes, decoded_logits, labels): """Calculate the loss from the logits and the labels. Args: decoded_logits: output of decoder labels: Labels tensor; Output from data_input flags: 0 if object is present 1 otherwise confidences: ?? boxes: encoding of bounding box location Returns: loss: Loss tensor of type float. """ confidences, boxes, mask = labels pred_boxes = decoded_logits['pred_boxes'] pred_logits = decoded_logits['pred_logits'] pred_confidences = decoded_logits['pred_confidences'] pred_confs_deltas = decoded_logits['pred_confs_deltas'] pred_boxes_deltas = decoded_logits['pred_boxes_deltas'] grid_size = hypes['grid_width'] * hypes['grid_height'] outer_size = grid_size * hypes['batch_size'] head = hypes['solver']['head_weights'] # Compute confidence loss confidences = tf.reshape(confidences, (outer_size, 1)) true_classes = tf.reshape(tf.cast(tf.greater(confidences, 0), 'int64'), [outer_size]) pred_classes = tf.reshape(pred_logits, [outer_size, hypes['num_classes']]) mask_r = tf.reshape(mask, [outer_size]) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=pred_classes, labels=true_classes) # ignore don't care areas cross_entropy_sum = (tf.reduce_sum(mask_r * cross_entropy)) confidences_loss = cross_entropy_sum / outer_size * head[0] true_boxes = tf.reshape(boxes, (outer_size, hypes['rnn_len'], 4)) # box loss for background prediction needs to be zerod out boxes_mask = tf.reshape( tf.cast(tf.greater(confidences, 0), 'float32'), (outer_size, 1, 1)) # danger zone # residual = (true_boxes - pred_boxes) * boxes_mask # boxes_loss = tf.reduce_sum(tf.abs(residual)) / outer_size * head[1] pred_boxes_flat = tf.reshape(pred_boxes * boxes_mask, [-1, 4]) perm_truth_flat = tf.reshape(true_boxes, [-1, 4]) iou = train_utils.iou(train_utils.to_x1y1x2y2(pred_boxes_flat), train_utils.to_x1y1x2y2(perm_truth_flat)) boxes_loss = -tf.reduce_sum(tf.log(tf.maximum(iou, 1e-3))) / (tf.reduce_sum(boxes_mask) + 1e-6) if hypes['use_rezoom']: # add rezoom loss rezoom_loss_input = true_boxes, pred_boxes, confidences, boxes_mask, \ pred_confs_deltas, pred_boxes_deltas, mask_r delta_confs_loss, delta_boxes_loss = _compute_rezoom_loss( hypes, rezoom_loss_input) _add_rezoom_loss_histograms(hypes, pred_boxes_deltas) loss = confidences_loss + boxes_loss + delta_boxes_loss \ + delta_confs_loss else: loss = confidences_loss + boxes_loss tf.add_to_collection('total_losses', loss) reg_loss_col = tf.GraphKeys.REGULARIZATION_LOSSES decoder_reg_loss = tf.add_n(tf.get_collection(reg_loss_col, scope='decoder'), name='decoder_reg_loss') weight_loss = tf.add_n(tf.get_collection(reg_loss_col, scope=''), name='reg_loss') encoder_reg_loss = tf.subtract(weight_loss, decoder_reg_loss, name='encoder_reg_loss') total_loss = weight_loss + loss tf.summary.scalar('/weights', weight_loss) tf.summary.scalar('/decoder_reg_loss', decoder_reg_loss) tf.summary.scalar('/encoder_reg_loss', encoder_reg_loss) losses = {} losses['total_loss'] = total_loss losses['decoder_reg_loss'] = decoder_reg_loss losses['encoder_reg_loss'] = encoder_reg_loss losses['loss'] = loss losses['confidences_loss'] = confidences_loss losses['boxes_loss'] = boxes_loss losses['weight_loss'] = weight_loss if hypes['use_rezoom']: losses['delta_boxes_loss'] = delta_boxes_loss losses['delta_confs_loss'] = delta_confs_loss return losses
def loss(hypes, decoded_logits, labels): """Calculate the loss from the logits and the labels. Args: decoded_logits: output of decoder labels: Labels tensor; Output from data_input Returns: loss: Loss tensor of type float. """ flags, confidences, boxes = labels pred_boxes = decoded_logits['pred_boxes'] pred_logits = decoded_logits['pred_logits'] pred_confidences = decoded_logits['pred_confidences'] pred_confs_deltas = decoded_logits['pred_confs_deltas'] pred_boxes_deltas = decoded_logits['pred_boxes_deltas'] grid_size = hypes['grid_width'] * hypes['grid_height'] outer_size = grid_size * hypes['batch_size'] with tf.variable_scope('decoder'): outer_boxes = tf.reshape(boxes, [outer_size, hypes['rnn_len'], 4]) outer_flags = tf.cast( tf.reshape(flags, [outer_size, hypes['rnn_len']]), 'int32') if hypes['use_lstm']: assignments, classes, perm_truth, pred_mask = ( tf.user_ops.hungarian(pred_boxes, outer_boxes, outer_flags, hypes['solver']['hungarian_iou'])) else: classes = tf.reshape(flags, (outer_size, 1)) perm_truth = tf.reshape(outer_boxes, (outer_size, 1, 4)) pred_mask = tf.reshape(tf.cast(tf.greater(classes, 0), 'float32'), (outer_size, 1, 1)) true_classes = tf.reshape(tf.cast(tf.greater(classes, 0), 'int64'), [outer_size * hypes['rnn_len']]) pred_logit_r = tf.reshape( pred_logits, [outer_size * hypes['rnn_len'], hypes['num_classes']]) grid_size = hypes['grid_width'] * hypes['grid_height'] outer_size = grid_size * hypes['batch_size'] cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=pred_logit_r, labels=true_classes) cross_entropy_sum = (tf.reduce_sum(cross_entropy)) head = hypes['solver']['head_weights'] confidences_loss = cross_entropy_sum / outer_size * head[0] residual = tf.reshape(perm_truth - pred_boxes * pred_mask, [outer_size, hypes['rnn_len'], 4]) boxes_loss = tf.reduce_sum(tf.abs(residual)) / outer_size * head[1] if hypes['use_rezoom']: if hypes['rezoom_change_loss'] == 'center': error = (perm_truth[:, :, 0:2] - pred_boxes[:, :, 0:2]) \ / tf.maximum(perm_truth[:, :, 2:4], 1.) square_error = tf.reduce_sum(tf.square(error), 2) inside = tf.reshape( tf.to_int64( tf.logical_and(tf.less(square_error, 0.2**2), tf.greater(classes, 0))), [-1]) elif hypes['rezoom_change_loss'] == 'iou': pred_boxes_flat = tf.reshape(pred_boxes, [-1, 4]) perm_truth_flat = tf.reshape(perm_truth, [-1, 4]) iou = train_utils.iou(train_utils.to_x1y1x2y2(pred_boxes_flat), train_utils.to_x1y1x2y2(perm_truth_flat)) inside = tf.reshape(tf.to_int64(tf.greater(iou, 0.5)), [-1]) else: assert not hypes['rezoom_change_loss'] inside = tf.reshape(tf.to_int64((tf.greater(classes, 0))), [-1]) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=pred_confs_deltas, labels=inside) delta_confs_loss = tf.reduce_sum(cross_entropy) \ / outer_size * hypes['solver']['head_weights'][0] * 0.1 loss = confidences_loss + boxes_loss + delta_confs_loss if hypes['reregress']: delta_unshaped = perm_truth - (pred_boxes + pred_boxes_deltas) delta_residual = tf.reshape(delta_unshaped * pred_mask, [outer_size, hypes['rnn_len'], 4]) sqrt_delta = tf.minimum(tf.square(delta_residual), 10.**2) delta_boxes_loss = (tf.reduce_sum(sqrt_delta) / outer_size * head[1] * 0.03) # boxes_loss = delta_boxes_loss tf.summary.histogram('/delta_hist0_x', pred_boxes_deltas[:, 0, 0]) tf.summary.histogram('/delta_hist0_y', pred_boxes_deltas[:, 0, 1]) tf.summary.histogram('/delta_hist0_w', pred_boxes_deltas[:, 0, 2]) tf.summary.histogram('/delta_hist0_h', pred_boxes_deltas[:, 0, 3]) loss += delta_boxes_loss else: loss = confidences_loss + boxes_loss # tf.add_to_collection('losses', loss) tf.add_to_collection('total_losses', loss) weight_loss = tf.add_n(tf.get_collection('losses'), name='total_loss') total_loss = weight_loss + loss losses = {} losses['total_loss'] = total_loss losses['loss'] = loss losses['confidences_loss'] = confidences_loss losses['boxes_loss'] = boxes_loss losses['weight_loss'] = weight_loss if hypes['reregress']: losses['delta_boxes_loss'] = delta_boxes_loss losses['delta_confs_loss'] = delta_confs_loss return losses