Example #1
0
def xdet_model_fn(features, labels, mode, params):
    """Our model_fn for ResNet to be used with our Estimator."""
    num_anchors_list = labels['num_anchors_list']
    num_feature_layers = len(num_anchors_list)

    shape = labels['targets'][-1]
    glabels = labels['targets'][:num_feature_layers][0]
    gtargets = labels['targets'][num_feature_layers:2 * num_feature_layers][0]
    gscores = labels['targets'][2 * num_feature_layers:3 *
                                num_feature_layers][0]

    with tf.variable_scope(params['model_scope'],
                           default_name=None,
                           values=[features],
                           reuse=tf.AUTO_REUSE):
        backbone = xdet_body_v2.xdet_resnet_v2(params['resnet_size'],
                                               params['data_format'])
        body_cls_output, body_regress_output = backbone(
            inputs=features, is_training=(mode == tf.estimator.ModeKeys.TRAIN))

        cls_pred, location_pred = xdet_body_v2.xdet_head(
            body_cls_output,
            body_regress_output,
            params['num_classes'],
            num_anchors_list[0], (mode == tf.estimator.ModeKeys.TRAIN),
            data_format=params['data_format'])

    if params['data_format'] == 'channels_first':
        cls_pred = tf.transpose(cls_pred, [0, 2, 3, 1])
        location_pred = tf.transpose(location_pred, [0, 2, 3, 1])

    bboxes_pred = labels['decode_fn'](
        location_pred
    )  #(tf.reshape(location_pred, tf.shape(location_pred).as_list()[0:-1] + [-1, 4]))

    cls_pred = tf.reshape(cls_pred, [-1, params['num_classes']])
    location_pred = tf.reshape(location_pred, [-1, 4])
    glabels = tf.reshape(glabels, [-1])
    gscores = tf.reshape(gscores, [-1])
    gtargets = tf.reshape(gtargets, [-1, 4])

    # raw mask for positive > 0.5, and for negetive < 0.3
    # each positive examples has one label
    positive_mask = glabels > 0  #tf.logical_and(glabels > 0, gscores > params['match_threshold'])
    fpositive_mask = tf.cast(positive_mask, tf.float32)
    n_positives = tf.reduce_sum(fpositive_mask)
    # negtive examples are those max_overlap is still lower than neg_threshold, note that some positive may also has lower jaccard
    # note those gscores is 0 is either be ignored during anchors encode or anchors have 0 overlap with all ground truth
    #negtive_mask = tf.logical_and(tf.logical_and(tf.logical_not(tf.logical_or(positive_mask, glabels < 0)), gscores < params['neg_threshold']), gscores > 0.)
    #gscores = tf.Print(gscores, [tf.reduce_sum(tf.cast(gscores > 0., tf.float32))])
    #glabels = tf.Print(glabels, [glabels, tf.reduce_sum(tf.cast(tf.equal(glabels, 0), tf.float32))], message='glabels: ', summarize=1000)

    negtive_mask = tf.logical_and(tf.equal(glabels, 0), gscores > 0.)
    #negtive_mask = tf.Print(negtive_mask, [tf.reduce_sum(tf.cast(negtive_mask, tf.float32))])
    #negtive_mask = tf.logical_and(tf.logical_and(tf.logical_not(positive_mask), gscores < params['neg_threshold']), gscores > 0.)
    #negtive_mask = tf.logical_and(gscores < params['neg_threshold'], tf.logical_not(positive_mask))
    fnegtive_mask = tf.cast(negtive_mask, tf.float32)
    n_negtives = tf.reduce_sum(fnegtive_mask)

    n_neg_to_select = tf.cast(params['negative_ratio'] * n_positives, tf.int32)
    n_neg_to_select = tf.minimum(n_neg_to_select,
                                 tf.cast(n_negtives, tf.int32))

    # hard negative mining for classification
    predictions_for_bg = tf.nn.softmax(cls_pred)[:, 0]
    prob_for_negtives = tf.where(
        negtive_mask,
        0. - predictions_for_bg,
        # ignore all the positives
        0. - tf.ones_like(predictions_for_bg))
    topk_prob_for_bg, _ = tf.nn.top_k(prob_for_negtives, k=n_neg_to_select)
    selected_neg_mask = prob_for_negtives > topk_prob_for_bg[-1]

    # # random select negtive examples for classification
    # selected_neg_mask = tf.random_uniform(tf.shape(gscores), minval=0, maxval=1.) < tf.where(
    #                                                                                     tf.greater(n_negtives, 0),
    #                                                                                     tf.divide(tf.cast(n_neg_to_select, tf.float32), n_negtives),
    #                                                                                     tf.zeros_like(tf.cast(n_neg_to_select, tf.float32)),
    #                                                                                     name='rand_select_negtive')

    # include both selected negtive and all positive examples
    final_mask = tf.stop_gradient(
        tf.logical_or(tf.logical_and(negtive_mask, selected_neg_mask),
                      positive_mask))
    total_examples = tf.reduce_sum(tf.cast(final_mask, tf.float32))

    # add mask for glabels and cls_pred here
    glabels = tf.boolean_mask(tf.clip_by_value(glabels, 0, FLAGS.num_classes),
                              tf.stop_gradient(final_mask))
    cls_pred = tf.boolean_mask(cls_pred, tf.stop_gradient(final_mask))
    location_pred = tf.boolean_mask(location_pred,
                                    tf.stop_gradient(positive_mask))
    gtargets = tf.boolean_mask(gtargets, tf.stop_gradient(positive_mask))
    predictions = {
        'classes':
        tf.argmax(cls_pred, axis=-1),
        'probabilities':
        tf.reduce_max(tf.nn.softmax(cls_pred, name='softmax_tensor'), axis=-1),
        'bboxes_predict':
        tf.reshape(bboxes_pred, [-1, 4])
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate loss, which includes softmax cross entropy and L2 regularization.
    cross_entropy = tf.cond(
        n_positives > 0., lambda: tf.losses.sparse_softmax_cross_entropy(
            labels=glabels, logits=cls_pred), lambda: 0.)
    #cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=glabels, logits=cls_pred)

    # Create a tensor named cross_entropy for logging purposes.
    tf.identity(cross_entropy, name='cross_entropy_loss')
    tf.summary.scalar('cross_entropy_loss', cross_entropy)

    loc_loss = tf.cond(
        n_positives > 0., lambda: modified_smooth_l1(
            location_pred, tf.stop_gradient(gtargets), sigma=1.),
        lambda: tf.zeros_like(location_pred))
    #loc_loss = modified_smooth_l1(location_pred, tf.stop_gradient(gtargets))
    loc_loss = tf.reduce_mean(tf.reduce_sum(loc_loss, axis=-1))
    loc_loss = tf.identity(loc_loss, name='location_loss')
    tf.summary.scalar('location_loss', loc_loss)
    tf.losses.add_loss(loc_loss)

    # Add weight decay to the loss. We exclude the batch norm variables because
    # doing so leads to a small improvement in accuracy.
    loss = 1.2 * (cross_entropy +
                  loc_loss) + params['weight_decay'] * tf.add_n([
                      tf.nn.l2_loss(v) for v in tf.trainable_variables()
                      if 'batch_normalization' not in v.name
                  ])
    total_loss = tf.identity(loss, name='total_loss')

    if mode == tf.estimator.ModeKeys.TRAIN:
        global_step = tf.train.get_or_create_global_step()

        lr_values = [
            params['learning_rate'] * decay
            for decay in params['lr_decay_factors']
        ]
        learning_rate = tf.train.piecewise_constant(
            tf.cast(global_step, tf.int32),
            [int(_) for _ in params['decay_boundaries']], lr_values)
        truncated_learning_rate = tf.maximum(
            learning_rate,
            tf.constant(params['end_learning_rate'],
                        dtype=learning_rate.dtype))
        # Create a tensor named learning_rate for logging purposes.
        tf.identity(truncated_learning_rate, name='learning_rate')
        tf.summary.scalar('learning_rate', truncated_learning_rate)

        optimizer = tf.train.MomentumOptimizer(
            learning_rate=truncated_learning_rate, momentum=params['momentum'])

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step)
    else:
        train_op = None

    cls_accuracy = tf.metrics.accuracy(glabels, predictions['classes'])
    metrics = {'cls_accuracy': cls_accuracy}

    # Create a tensor named train_accuracy for logging purposes.
    tf.identity(cls_accuracy[1], name='cls_accuracy')
    tf.summary.scalar('cls_accuracy', cls_accuracy[1])

    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        loss=loss,
        train_op=train_op,
        eval_metric_ops=metrics,
        scaffold=tf.train.Scaffold(
            init_fn=train_helper.get_init_fn_for_scaffold(FLAGS)))
Example #2
0
def xdet_model_fn(features, labels, mode, params):
    """Our model_fn for ResNet to be used with our Estimator."""
    num_anchors_list = labels['num_anchors_list']
    num_feature_layers = len(num_anchors_list)

    shape = labels['targets'][-1]
    if mode != tf.estimator.ModeKeys.TRAIN:
        org_image = labels['targets'][-2]
        isdifficult = labels['targets'][-3]
        bbox_img = labels['targets'][-4]
        gbboxes_raw = labels['targets'][-5]
        glabels_raw = labels['targets'][-6]

    glabels = labels['targets'][:num_feature_layers][0]
    gtargets = labels['targets'][num_feature_layers : 2 * num_feature_layers][0]
    gscores = labels['targets'][2 * num_feature_layers : 3 * num_feature_layers][0]

    with tf.variable_scope(params['model_scope'], default_name = None, values = [features], reuse=tf.AUTO_REUSE):
        backbone = xdet_body_v2.xdet_resnet_v2(params['resnet_size'], params['data_format'])
        body_cls_output, body_regress_output = backbone(inputs=features, is_training=(mode == tf.estimator.ModeKeys.TRAIN))

        cls_pred, location_pred = xdet_body_v2.xdet_head(body_cls_output, body_regress_output, params['num_classes'], num_anchors_list[0], (mode == tf.estimator.ModeKeys.TRAIN), data_format=params['data_format'])

    if params['data_format'] == 'channels_first':
        cls_pred = tf.transpose(cls_pred, [0, 2, 3, 1])
        location_pred = tf.transpose(location_pred, [0, 2, 3, 1])
        #org_image = tf.transpose(org_image, [0, 2, 3, 1])
    # batch size is 1
    shape = tf.squeeze(shape, axis = 0)
    glabels = tf.squeeze(glabels, axis = 0)
    gtargets = tf.squeeze(gtargets, axis = 0)
    gscores = tf.squeeze(gscores, axis = 0)
    cls_pred = tf.squeeze(cls_pred, axis = 0)
    location_pred = tf.squeeze(location_pred, axis = 0)
    if mode != tf.estimator.ModeKeys.TRAIN:
        org_image = tf.squeeze(org_image, axis = 0)
        isdifficult = tf.squeeze(isdifficult, axis = 0)
        gbboxes_raw = tf.squeeze(gbboxes_raw, axis = 0)
        glabels_raw = tf.squeeze(glabels_raw, axis = 0)
        bbox_img = tf.squeeze(bbox_img, axis = 0)

    bboxes_pred = labels['decode_fn'](location_pred)#(tf.reshape(location_pred, location_pred.get_shape().as_list()[:-1] + [-1, 4]))#(location_pred)#

    eval_ops, save_image_op = bboxes_eval(org_image, shape, bbox_img, cls_pred, bboxes_pred, glabels_raw, gbboxes_raw, isdifficult, params['num_classes'])
    _ = tf.identity(save_image_op, name='save_image_with_bboxes_op')

    cls_pred = tf.reshape(cls_pred, [-1, params['num_classes']])
    location_pred = tf.reshape(location_pred, [-1, 4])
    glabels = tf.reshape(glabels, [-1])
    gscores = tf.reshape(gscores, [-1])
    gtargets = tf.reshape(gtargets, [-1, 4])

    # raw mask for positive > 0.5, and for negetive < 0.3
    # each positive examples has one label
    positive_mask = glabels > 0#tf.logical_and(glabels > 0, gscores > params['match_threshold'])
    fpositive_mask = tf.cast(positive_mask, tf.float32)
    n_positives = tf.reduce_sum(fpositive_mask)

    batch_glabels = tf.reshape(glabels, [tf.shape(features)[0], -1])
    batch_n_positives = tf.count_nonzero(batch_glabels, -1)

    batch_negtive_mask = tf.equal(batch_glabels, 0)
    batch_n_negtives = tf.count_nonzero(batch_negtive_mask, -1)

    batch_n_neg_select = tf.cast(params['negative_ratio'] * tf.cast(batch_n_positives, tf.float32), tf.int32)
    batch_n_neg_select = tf.minimum(batch_n_neg_select, tf.cast(batch_n_negtives, tf.int32))

    # hard negative mining for classification
    predictions_for_bg = tf.nn.softmax(tf.reshape(cls_pred, [tf.shape(features)[0], -1, params['num_classes']]))[:, :, 0]
    prob_for_negtives = tf.where(batch_negtive_mask,
                           0. - predictions_for_bg,
                           # ignore all the positives
                           0. - tf.ones_like(predictions_for_bg))
    topk_prob_for_bg, _ = tf.nn.top_k(prob_for_negtives, k=tf.shape(prob_for_negtives)[1])
    score_at_k = tf.gather_nd(topk_prob_for_bg, tf.stack([tf.range(tf.shape(features)[0]), batch_n_neg_select - 1], axis=-1))

    selected_neg_mask = prob_for_negtives >= tf.expand_dims(score_at_k, axis=-1)

    negtive_mask = tf.reshape(tf.logical_and(batch_negtive_mask, selected_neg_mask), [-1])#tf.logical_and(tf.equal(glabels, 0), gscores > 0.)
    #negtive_mask = tf.logical_and(tf.logical_and(tf.logical_not(positive_mask), gscores < params['neg_threshold']), gscores > 0.)
    #negtive_mask = tf.logical_and(gscores < params['neg_threshold'], tf.logical_not(positive_mask))

    # # random select negtive examples for classification
    # selected_neg_mask = tf.random_uniform(tf.shape(gscores), minval=0, maxval=1.) < tf.where(
    #                                                                                     tf.greater(n_negtives, 0),
    #                                                                                     tf.divide(tf.cast(n_neg_to_select, tf.float32), n_negtives),
    #                                                                                     tf.zeros_like(tf.cast(n_neg_to_select, tf.float32)),
    #                                                                                     name='rand_select_negtive')

    # include both selected negtive and all positive examples
    final_mask = tf.stop_gradient(tf.logical_or(negtive_mask, positive_mask))
    total_examples = tf.reduce_sum(tf.cast(final_mask, tf.float32))

    # add mask for glabels and cls_pred here
    glabels = tf.boolean_mask(tf.clip_by_value(glabels, 0, FLAGS.num_classes), tf.stop_gradient(final_mask))
    cls_pred = tf.boolean_mask(cls_pred, tf.stop_gradient(final_mask))
    location_pred = tf.boolean_mask(location_pred, tf.stop_gradient(positive_mask))
    gtargets = tf.boolean_mask(gtargets, tf.stop_gradient(positive_mask))

    # Calculate loss, which includes softmax cross entropy and L2 regularization.
    cross_entropy = tf.cond(n_positives > 0., lambda: tf.losses.sparse_softmax_cross_entropy(labels=glabels, logits=cls_pred), lambda: 0.)
    #cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=glabels, logits=cls_pred)

    # Create a tensor named cross_entropy for logging purposes.
    tf.identity(cross_entropy, name='cross_entropy_loss')
    tf.summary.scalar('cross_entropy_loss', cross_entropy)

    loc_loss = tf.cond(n_positives > 0., lambda: modified_smooth_l1(location_pred, tf.stop_gradient(gtargets), sigma=1.), lambda: tf.zeros_like(location_pred))
    #loc_loss = modified_smooth_l1(location_pred, tf.stop_gradient(gtargets))
    loc_loss = tf.reduce_mean(tf.reduce_sum(loc_loss, axis=-1))
    loc_loss = tf.identity(loc_loss, name='location_loss')
    tf.summary.scalar('location_loss', loc_loss)
    tf.losses.add_loss(loc_loss)

    with tf.control_dependencies([save_image_op]):
        # Add weight decay to the loss. We exclude the batch norm variables because
        # doing so leads to a small improvement in accuracy.
        loss = 1.2 * (cross_entropy + loc_loss) + params['weight_decay'] * tf.add_n(
          [tf.nn.l2_loss(v) for v in tf.trainable_variables()
           if 'batch_normalization' not in v.name])
        total_loss = tf.identity(loss, name='total_loss')

    predictions = {
        'classes': tf.argmax(cls_pred, axis=-1),
        'probabilities': tf.reduce_max(tf.nn.softmax(cls_pred, name='softmax_tensor'), axis=-1),
        'bboxes_predict': tf.reshape(bboxes_pred, [-1, 4]),
        'saved_image_index': save_image_op }

    summary_hook = tf.train.SummarySaverHook(
                        save_secs=FLAGS.save_summary_steps,
                        output_dir=FLAGS.model_dir,
                        summary_op=tf.summary.merge_all())

    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions,
                        evaluation_hooks = [summary_hook],
                        loss=loss, eval_metric_ops=eval_ops)#=eval_ops)
    else:
        raise ValueError('This script only support predict mode!')