def main(_): with tf.Graph().as_default(): image_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) shape_input = tf.placeholder(tf.int32, shape=(2, )) features = common_preprocessing.light_head_preprocess_for_test( image_input, [FLAGS.train_image_size] * 2, data_format=('NCHW' if FLAGS.data_format == 'channels_first' else 'NHWC')) features = tf.expand_dims(features, axis=0) anchor_creator = anchor_manipulator.AnchorCreator( [FLAGS.train_image_size] * 2, layers_shapes=[(30, 30)], anchor_scales=[[0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]], extra_anchor_scales=[[0.1]], anchor_ratios=[[1., 2., .5]], layer_steps=[16]) all_anchors, num_anchors_list = anchor_creator.get_all_anchors() anchor_encoder_decoder = anchor_manipulator.AnchorEncoder( all_anchors, num_classes=FLAGS.num_classes, allowed_borders=None, positive_threshold=None, ignore_threshold=None, prior_scaling=[1., 1., 1., 1.]) with tf.variable_scope(FLAGS.model_scope, default_name=None, values=[features], reuse=tf.AUTO_REUSE): rpn_feat_map, backbone_feat = xception_body.XceptionBody( features, FLAGS.num_classes, is_training=False, data_format=FLAGS.data_format) #rpn_feat_map = tf.Print(rpn_feat_map,[tf.shape(rpn_feat_map), rpn_feat_map,backbone_feat]) rpn_cls_score, rpn_bbox_pred = xception_body.get_rpn( rpn_feat_map, num_anchors_list[0], False, FLAGS.data_format, 'rpn_head') large_sep_feature = xception_body.large_sep_kernel( backbone_feat, 256, 10 * 7 * 7, False, FLAGS.data_format, 'large_sep_feature') if FLAGS.data_format == 'channels_first': rpn_cls_score = tf.transpose(rpn_cls_score, [0, 2, 3, 1]) rpn_bbox_pred = tf.transpose(rpn_bbox_pred, [0, 2, 3, 1]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_object_score = tf.nn.softmax(rpn_cls_score)[:, -1] rpn_object_score = tf.reshape(rpn_object_score, [1, -1]) rpn_location_pred = tf.reshape(rpn_bbox_pred, [1, -1, 4]) rpn_bboxes_pred = anchor_encoder_decoder.decode_all_anchors( [rpn_location_pred], squeeze_inner=True)[0] proposals_bboxes = xception_body.get_proposals( rpn_object_score, rpn_bboxes_pred, None, FLAGS.rpn_pre_nms_top_n, FLAGS.rpn_post_nms_top_n, FLAGS.rpn_nms_thres, FLAGS.rpn_min_size, False, FLAGS.data_format) cls_score, bboxes_reg = xception_body.get_head( large_sep_feature, lambda input_, bboxes_, grid_width_, grid_height_: ps_roi_align( input_, bboxes_, grid_width_, grid_height_, pool_method), 7, 7, None, proposals_bboxes, FLAGS.num_classes, False, False, 0, FLAGS.data_format, 'final_head') head_bboxes_pred = anchor_encoder_decoder.ext_decode_rois( proposals_bboxes, bboxes_reg, head_prior_scaling=[1., 1., 1., 1.]) head_cls_score = tf.reshape(cls_score, [-1, FLAGS.num_classes]) head_cls_score = tf.nn.softmax(head_cls_score) head_bboxes_pred = tf.reshape(head_bboxes_pred, [-1, 4]) with tf.device('/device:CPU:0'): selected_scores, selected_bboxes = eval_helper.tf_bboxes_select( [head_cls_score], [head_bboxes_pred], FLAGS.select_threshold, FLAGS.num_classes, scope='xdet_v2_select') selected_bboxes = eval_helper.bboxes_clip( tf.constant([0., 0., 1., 1.]), selected_bboxes) selected_scores, selected_bboxes = eval_helper.filter_boxes( selected_scores, selected_bboxes, 0.03, shape_input, [FLAGS.train_image_size] * 2, keep_top_k=FLAGS.nms_topk * 2) # Resize bboxes to original image shape. selected_bboxes = eval_helper.bboxes_resize( tf.constant([0., 0., 1., 1.]), selected_bboxes) selected_scores, selected_bboxes = eval_helper.bboxes_sort( selected_scores, selected_bboxes, top_k=FLAGS.nms_topk * 2) # Apply NMS algorithm. selected_scores, selected_bboxes = eval_helper.bboxes_nms_batch( selected_scores, selected_bboxes, nms_threshold=FLAGS.nms_threshold, keep_top_k=FLAGS.nms_topk) labels_list = [] for k, v in selected_scores.items(): labels_list.append(tf.ones_like(v, tf.int32) * k) all_labels = tf.concat(labels_list, axis=0) all_scores = tf.concat(list(selected_scores.values()), axis=0) all_bboxes = tf.concat(list(selected_bboxes.values()), axis=0) saver = tf.train.Saver() with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) saver.restore(sess, FLAGS.checkpoint_path) np_image = imread('./demo/test.jpg') labels_, scores_, bboxes_ = sess.run( [all_labels, all_scores, all_bboxes], feed_dict={ image_input: np_image, shape_input: np_image.shape[:-1] }) img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image, labels_, scores_, bboxes_, thickness=2) imsave(os.path.join(FLAGS.debug_dir, 'test_out.jpg'), img_to_draw)
def lighr_head_model_fn(features, labels, mode, params): """Our model_fn for ResNet to be used with our Estimator.""" num_anchors_list = labels['num_anchors_list'] num_feature_layers = len(num_anchors_list) shape = labels['targets'][-1] glabels = labels['targets'][:num_feature_layers][0] gtargets = labels['targets'][num_feature_layers:2 * num_feature_layers][0] gscores = labels['targets'][2 * num_feature_layers:3 * num_feature_layers][0] #features = tf.ones([4,480,480,3]) * 0.5 with tf.variable_scope(params['model_scope'], default_name=None, values=[features], reuse=tf.AUTO_REUSE): rpn_feat_map, backbone_feat = xception_body.XceptionBody( features, params['num_classes'], is_training=(mode == tf.estimator.ModeKeys.TRAIN), data_format=params['data_format']) #rpn_feat_map = tf.Print(rpn_feat_map,[tf.shape(rpn_feat_map), rpn_feat_map,backbone_feat]) rpn_cls_score, rpn_bbox_pred = xception_body.get_rpn( rpn_feat_map, num_anchors_list[0], (mode == tf.estimator.ModeKeys.TRAIN), params['data_format'], 'rpn_head') large_sep_feature = xception_body.large_sep_kernel( backbone_feat, 256, 10 * 7 * 7, (mode == tf.estimator.ModeKeys.TRAIN), params['data_format'], 'large_sep_feature') if params['data_format'] == 'channels_first': rpn_cls_score = tf.transpose(rpn_cls_score, [0, 2, 3, 1]) rpn_bbox_pred = tf.transpose(rpn_bbox_pred, [0, 2, 3, 1]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_object_score = tf.nn.softmax(rpn_cls_score)[:, -1] #with tf.device('/cpu:0'): rpn_object_score = tf.reshape(rpn_object_score, [params['batch_size'], -1]) rpn_location_pred = tf.reshape(rpn_bbox_pred, [params['batch_size'], -1, 4]) #rpn_location_pred = tf.Print(rpn_location_pred,[tf.shape(rpn_location_pred), rpn_location_pred]) rpn_bboxes_pred = labels['rpn_decode_fn'](rpn_location_pred) #rpn_bboxes_pred = tf.Print(rpn_bboxes_pred,[tf.shape(rpn_bboxes_pred), rpn_bboxes_pred]) # rpn loss here cls_pred = tf.reshape(rpn_cls_score, [-1, 2]) location_pred = tf.reshape(rpn_bbox_pred, [-1, 4]) glabels = tf.reshape(glabels, [-1]) gscores = tf.reshape(gscores, [-1]) gtargets = tf.reshape(gtargets, [-1, 4]) expected_num_fg_rois = tf.cast( tf.round( tf.cast(params['batch_size'] * params['rpn_anchors_per_image'], tf.float32) * params['rpn_fg_ratio']), tf.int32) def select_samples(cls_pred, location_pred, glabels, gscores, gtargets): def upsampel_impl(now_count, need_count): # sample with replacement left_count = need_count - now_count select_indices = tf.random_shuffle( tf.range(now_count))[:tf.floormod(left_count, now_count)] select_indices = tf.concat([ tf.tile(tf.range(now_count), [tf.floor_div(left_count, now_count) + 1]), select_indices ], axis=0) return select_indices def downsample_impl(now_count, need_count): # downsample with replacement select_indices = tf.random_shuffle( tf.range(now_count))[:need_count] return select_indices positive_mask = glabels > 0 positive_indices = tf.squeeze(tf.where(positive_mask), axis=-1) n_positives = tf.shape(positive_indices)[0] # either downsample or take all fg_select_indices = tf.cond( n_positives < expected_num_fg_rois, lambda: positive_indices, lambda: tf.gather( positive_indices, downsample_impl(n_positives, expected_num_fg_rois))) # now the all rois taken as positive is min(n_positives, expected_num_fg_rois) #negtive_mask = tf.logical_and(tf.logical_and(tf.logical_not(tf.logical_or(positive_mask, glabels < 0)), gscores < params['rpn_neg_threshold']), gscores > 0.) negtive_mask = tf.equal( glabels, 0) #tf.logical_and(tf.equal(glabels, 0), gscores > 0.) negtive_indices = tf.squeeze(tf.where(negtive_mask), axis=-1) n_negtives = tf.shape(negtive_indices)[0] expected_num_bg_rois = params[ 'batch_size'] * params['rpn_anchors_per_image'] - tf.minimum( n_positives, expected_num_fg_rois) # either downsample or take all bg_select_indices = tf.cond( n_negtives < expected_num_bg_rois, lambda: negtive_indices, lambda: tf.gather( negtive_indices, downsample_impl(n_negtives, expected_num_bg_rois))) # now the all rois taken as positive is min(n_negtives, expected_num_bg_rois) keep_indices = tf.concat([fg_select_indices, bg_select_indices], axis=0) n_keeps = tf.shape(keep_indices)[0] # now n_keeps must be equal or less than rpn_anchors_per_image final_keep_indices = tf.cond( n_keeps < params['batch_size'] * params['rpn_anchors_per_image'], lambda: tf.gather( keep_indices, upsampel_impl( n_keeps, params['batch_size'] * params[ 'rpn_anchors_per_image'])), lambda: keep_indices) return tf.gather(cls_pred, final_keep_indices), tf.gather( location_pred, final_keep_indices), tf.cast( tf.gather( tf.clip_by_value(glabels, 0, params['num_classes']), final_keep_indices) > 0, tf.int64), tf.gather(gscores, final_keep_indices), tf.gather( gtargets, final_keep_indices) cls_pred, location_pred, glabels, gscores, gtargets = select_samples( cls_pred, location_pred, glabels, gscores, gtargets) # Calculate loss, which includes softmax cross entropy and L2 regularization. rpn_cross_entropy = tf.losses.sparse_softmax_cross_entropy( labels=glabels, logits=cls_pred) # Create a tensor named cross_entropy for logging purposes. rpn_cross_entropy = tf.identity(rpn_cross_entropy, name='rpn_cross_entropy_loss') tf.summary.scalar('rpn_cross_entropy_loss', rpn_cross_entropy) total_positive_mask = (glabels > 0) gtargets = tf.boolean_mask(gtargets, tf.stop_gradient(total_positive_mask)) location_pred = tf.boolean_mask(location_pred, tf.stop_gradient(total_positive_mask)) #gtargets = tf.Print(gtargets, [gtargets], message='gtargets:', summarize=100) rpn_l1_distance = modified_smooth_l1(location_pred, gtargets, sigma=1.) rpn_loc_loss = tf.reduce_mean(tf.reduce_sum( rpn_l1_distance, axis=-1)) / params['rpn_fg_ratio'] rpn_loc_loss = tf.identity(rpn_loc_loss, name='rpn_location_loss') tf.summary.scalar('rpn_location_loss', rpn_loc_loss) tf.losses.add_loss(rpn_loc_loss) rpn_loss = tf.identity(rpn_loc_loss + rpn_cross_entropy, name='rpn_loss') tf.summary.scalar('rpn_loss', rpn_loss) #print(rpn_loc_loss) proposals_bboxes, proposals_targets, proposals_labels, proposals_scores = xception_body.get_proposals( rpn_object_score, rpn_bboxes_pred, labels['rpn_encode_fn'], params['rpn_pre_nms_top_n'], params['rpn_post_nms_top_n'], params['rpn_nms_thres'], params['rpn_min_size'], (mode == tf.estimator.ModeKeys.TRAIN), params['data_format']) #proposals_targets = tf.Print(proposals_targets, [proposals_targets], message='proposals_targets0:') def head_loss_func(cls_score, bboxes_reg, select_indices, proposals_targets, proposals_labels): if select_indices is not None: proposals_targets = tf.gather(proposals_targets, select_indices, axis=1) proposals_labels = tf.gather(proposals_labels, select_indices, axis=1) # Calculate loss, which includes softmax cross entropy and L2 regularization. head_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=proposals_labels, logits=cls_score) total_positive_mask = tf.cast((proposals_labels > 0), tf.float32) # proposals_targets = tf.boolean_mask(proposals_targets, tf.stop_gradient(total_positive_mask)) # bboxes_reg = tf.boolean_mask(bboxes_reg, tf.stop_gradient(total_positive_mask)) head_loc_loss = modified_smooth_l1(bboxes_reg, proposals_targets, sigma=1.) head_loc_loss = tf.reduce_sum(head_loc_loss, axis=-1) * total_positive_mask if (params['using_ohem'] and (select_indices is not None)) or (not params['using_ohem']): head_cross_entropy_loss = tf.reduce_mean(head_cross_entropy) head_cross_entropy_loss = tf.identity( head_cross_entropy_loss, name='head_cross_entropy_loss') tf.summary.scalar('head_cross_entropy_loss', head_cross_entropy_loss) head_location_loss = tf.reduce_mean( head_loc_loss) / params['fg_ratio'] head_location_loss = tf.identity(head_location_loss, name='head_location_loss') tf.summary.scalar('head_location_loss', head_location_loss) return head_cross_entropy + head_loc_loss / params['fg_ratio'] head_loss = xception_body.get_head( large_sep_feature, lambda input_, bboxes_, grid_width_, grid_height_: ps_roi_align( input_, bboxes_, grid_width_, grid_height_, pool_method), 7, 7, lambda cls, bbox, indices: head_loss_func( cls, bbox, indices, proposals_targets, proposals_labels), proposals_bboxes, params['num_classes'], (mode == tf.estimator.ModeKeys.TRAIN), params['using_ohem'], params['ohem_roi_one_image'], params['data_format'], 'final_head') # Create a tensor named cross_entropy for logging purposes. head_loss = tf.identity(head_loss, name='head_loss') tf.summary.scalar('head_loss', head_loss) tf.losses.add_loss(head_loss) if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=None) # Add weight decay to the loss. We exclude the batch norm variables because # doing so leads to a small improvement in accuracy. loss = rpn_cross_entropy + rpn_loc_loss + head_loss + params[ 'weight_decay'] * tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if (('batch_normalization' not in v.name) and ('_bn' not in v.name)) ]) #_bn total_loss = tf.identity(loss, name='total_loss') if mode == tf.estimator.ModeKeys.TRAIN: global_step = tf.train.get_or_create_global_step() lr_values = [ params['learning_rate'] * decay for decay in params['lr_decay_factors'] ] learning_rate = tf.train.piecewise_constant( tf.cast(global_step, tf.int32), [int(_) for _ in params['decay_boundaries']], lr_values) truncated_learning_rate = tf.maximum( learning_rate, tf.constant(params['end_learning_rate'], dtype=learning_rate.dtype)) # Create a tensor named learning_rate for logging purposes. tf.identity(truncated_learning_rate, name='learning_rate') tf.summary.scalar('learning_rate', truncated_learning_rate) optimizer = tf.train.MomentumOptimizer( learning_rate=truncated_learning_rate, momentum=params['momentum']) # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step) else: train_op = None return tf.estimator.EstimatorSpec( mode=mode, predictions=None, loss=loss, train_op=train_op, eval_metric_ops=None, scaffold=tf.train.Scaffold( init_fn=train_helper.get_init_fn_for_scaffold(FLAGS)))
def lighr_head_model_fn(features, labels, mode, params): """Our model_fn for ResNet to be used with our Estimator.""" num_anchors_list = labels['num_anchors_list'] num_feature_layers = len(num_anchors_list) shape = labels['targets'][-1] if mode != tf.estimator.ModeKeys.TRAIN: org_image = labels['targets'][-2] isdifficult = labels['targets'][-3] bbox_img = labels['targets'][-4] gbboxes_raw = labels['targets'][-5] glabels_raw = labels['targets'][-6] glabels = labels['targets'][:num_feature_layers][0] gtargets = labels['targets'][num_feature_layers:2 * num_feature_layers][0] gscores = labels['targets'][2 * num_feature_layers:3 * num_feature_layers][0] #features = tf.ones([4,480,480,3]) * 0.5 with tf.variable_scope(params['model_scope'], default_name=None, values=[features], reuse=tf.AUTO_REUSE): rpn_feat_map, backbone_feat = xception_body.XceptionBody( features, params['num_classes'], is_training=(mode == tf.estimator.ModeKeys.TRAIN), data_format=params['data_format']) #rpn_feat_map = tf.Print(rpn_feat_map,[tf.shape(rpn_feat_map), rpn_feat_map,backbone_feat]) rpn_cls_score, rpn_bbox_pred = xception_body.get_rpn( rpn_feat_map, num_anchors_list[0], (mode == tf.estimator.ModeKeys.TRAIN), params['data_format'], 'rpn_head') large_sep_feature = xception_body.large_sep_kernel( backbone_feat, 256, 10 * 7 * 7, (mode == tf.estimator.ModeKeys.TRAIN), params['data_format'], 'large_sep_feature') if params['data_format'] == 'channels_first': rpn_cls_score = tf.transpose(rpn_cls_score, [0, 2, 3, 1]) rpn_bbox_pred = tf.transpose(rpn_bbox_pred, [0, 2, 3, 1]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_object_score = tf.nn.softmax(rpn_cls_score)[:, -1] rpn_object_score = tf.reshape(rpn_object_score, [1, -1]) rpn_location_pred = tf.reshape(rpn_bbox_pred, [1, -1, 4]) rpn_bboxes_pred = labels['rpn_decode_fn'](rpn_location_pred) proposals_bboxes = xception_body.get_proposals( rpn_object_score, rpn_bboxes_pred, None, params['rpn_pre_nms_top_n'], params['rpn_post_nms_top_n'], params['rpn_nms_thres'], params['rpn_min_size'], (mode == tf.estimator.ModeKeys.TRAIN), params['data_format']) #proposals_targets = tf.Print(proposals_targets, [proposals_targets], message='proposals_targets0:') cls_score, bboxes_reg = xception_body.get_head( large_sep_feature, lambda input_, bboxes_, grid_width_, grid_height_: ps_roi_align( input_, bboxes_, grid_width_, grid_height_, pool_method), 7, 7, None, proposals_bboxes, params['num_classes'], (mode == tf.estimator.ModeKeys.TRAIN), False, 0, params['data_format'], 'final_head') head_bboxes_pred = labels['head_decode_fn'](proposals_bboxes, bboxes_reg) head_cls_score = tf.reshape(cls_score, [-1, params['num_classes']]) head_cls_score = tf.nn.softmax(head_cls_score) head_bboxes_pred = tf.reshape(head_bboxes_pred, [-1, 4]) shape = tf.squeeze(shape, axis=0) glabels = tf.squeeze(glabels, axis=0) gtargets = tf.squeeze(gtargets, axis=0) gscores = tf.squeeze(gscores, axis=0) if mode != tf.estimator.ModeKeys.TRAIN: org_image = tf.squeeze(org_image, axis=0) isdifficult = tf.squeeze(isdifficult, axis=0) gbboxes_raw = tf.squeeze(gbboxes_raw, axis=0) glabels_raw = tf.squeeze(glabels_raw, axis=0) bbox_img = tf.squeeze(bbox_img, axis=0) eval_ops, save_image_op = bboxes_eval(org_image, shape, bbox_img, cls_score, head_bboxes_pred, glabels_raw, gbboxes_raw, isdifficult, params['num_classes']) _ = tf.identity(save_image_op, name='save_image_with_bboxes_op') with tf.control_dependencies([save_image_op]): weight_decay_loss = params['weight_decay'] * tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'batch_normalization' not in v.name ]) predictions = { 'classes': tf.argmax(head_cls_score, axis=-1), 'probabilities': tf.reduce_max(head_cls_score, axis=-1), 'bboxes_predict': head_bboxes_pred, 'saved_image_index': save_image_op } summary_hook = tf.train.SummarySaverHook( save_secs=FLAGS.save_summary_steps, output_dir=FLAGS.model_dir, summary_op=tf.summary.merge_all()) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, evaluation_hooks=[summary_hook], loss=weight_decay_loss, eval_metric_ops=eval_ops) else: raise ValueError('This script only support predict mode!')