def test_exclude_low_score_box(): threshold = 0.35 inputs = np.array([ [ [10, 11, 12, 13, 1, 0.1], [20, 21, 22, 23, 2, 0.2], ], [ [30, 31, 32, 33, 3, 0.3], [40, 41, 42, 43, 4, 0.4], ], [ [50, 51, 52, 53, 5, 0.5], [60, 61, 62, 63, 6, 0.6], ], ]) expected_ys = [ np.zeros([0, 6]), np.array([ [40, 41, 42, 43, 4, 0.4], ]), np.array([ [50, 51, 52, 53, 5, 0.5], [60, 61, 62, 63, 6, 0.6], ]), ] post_process = ExcludeLowScoreBox(threshold=threshold, ) ys = post_process(inputs)["outputs"] for expected_y, y in zip(expected_ys, ys): assert np.allclose(expected_y, y)
def test_yolov2_post_process(): tf.InteractiveSession() image_size = [96, 64] batch_size = 2 classes = Pascalvoc2007.classes anchors = [(0.1, 0.2), (1.2, 1.1)] data_format = "NHWC" score_threshold = 0.25 nms_iou_threshold = 0.5 model = YoloV2( image_size=image_size, batch_size=batch_size, classes=classes, anchors=anchors, data_format=data_format, score_threshold=score_threshold, nms_iou_threshold=nms_iou_threshold, ) post_process = Sequence([ FormatYoloV2( image_size=image_size, classes=classes, anchors=anchors, data_format=data_format, ), ExcludeLowScoreBox(threshold=score_threshold), NMS( iou_threshold=nms_iou_threshold, classes=classes, ), ]) shape = (batch_size, len(anchors) * (len(classes) + 5), image_size[0] // 32, image_size[1] // 32) np_output = np.random.uniform(-2., 2., size=shape).astype(np.float32) output = tf.constant(np_output) ys = model.post_process(output) expected_ys = post_process(outputs=np_output)["outputs"] for y, expected_y in zip(ys, expected_ys): assert np.allclose(y.eval(), expected_y), (y.eval(), expected_y)
PRETRAIN_FILE = "" PRE_PROCESSOR = Sequence([ResizeWithGtBoxes(size=IMAGE_SIZE), DivideBy255()]) anchors = [(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)] score_threshold = 0.05 nms_iou_threshold = 0.5 nms_max_output_size = 100 POST_PROCESSOR = Sequence([ FormatYoloV2( image_size=IMAGE_SIZE, classes=CLASSES, anchors=anchors, data_format=DATA_FORMAT, ), ExcludeLowScoreBox(threshold=score_threshold), NMS( iou_threshold=nms_iou_threshold, max_output_size=nms_max_output_size, classes=CLASSES, ), ]) NETWORK = SmartDict() NETWORK.OPTIMIZER_CLASS = tf.compat.v1.train.MomentumOptimizer NETWORK.OPTIMIZER_KWARGS = {"momentum": 0.9} NETWORK.LEARNING_RATE_FUNC = tf.compat.v1.train.piecewise_constant _epoch_steps = 16551 // BATCH_SIZE NETWORK.LEARNING_RATE_KWARGS = { "values": [1e-6, 1e-4, 1e-5, 1e-6, 1e-7], "boundaries":
TFDS_PRE_PROCESSOR = TFDSProcessorSequence( [TFDSResizeWithGtBoxes(IMAGE_SIZE), TFDSPerImageStandardization()]) anchors = [ (0.5, 0.25), (1.0, 0.75), ] POST_PROCESSOR = Sequence([ FormatYoloV2( image_size=IMAGE_SIZE, classes=CLASSES, anchors=anchors, data_format=DATA_FORMAT, ), ExcludeLowScoreBox(threshold=0.05), NMS( iou_threshold=0.5, classes=CLASSES, ), ]) NETWORK = EasyDict() NETWORK.OPTIMIZER_CLASS = tf.train.AdamOptimizer NETWORK.OPTIMIZER_KWARGS = {"learning_rate": 0.001} NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT NETWORK.ANCHORS = anchors NETWORK.WEIGHT_DECAY_RATE = 0.0005 NETWORK.ACTIVATION_QUANTIZER = linear_mid_tread_half_quantizer