def test_nms(): iou_threshold = 0.4 classes = range(5) per_class = True inputs = [ np.array([ [10, 11, 12, 13, 1, 0.1], [11, 12, 13, 14, 1, 0.2], [12, 13, 14, 15, 1, 0.3], [80, 81, 22, 23, 2, 0.2], ]), np.array([ [80, 81, 22, 23, 2, 0.1], [30, 31, 32, 33, 3, 0.3], [80, 81, 22, 23, 3, 0.2], [30, 31, 32, 33, 4, 0.4], ]), np.array([ [60, 61, 62, 63, 2, 0.6], [82, 22, 32, 32, 2, 0.7], [83, 23, 33, 33, 2, 0.6], ]), ] expected_ys = [ np.array([ [12, 13, 14, 15, 1, 0.3], [80, 81, 22, 23, 2, 0.2], ]), np.array([ [80, 81, 22, 23, 2, 0.1], [30, 31, 32, 33, 3, 0.3], [80, 81, 22, 23, 3, 0.2], [30, 31, 32, 33, 4, 0.4], ]), np.array([ [82, 22, 32, 32, 2, 0.7], [60, 61, 62, 63, 2, 0.6], ]), ] post_process = NMS( classes=classes, iou_threshold=iou_threshold, per_class=per_class, ) ys = post_process(inputs)["outputs"] for expected_y, y in zip(expected_ys, ys): assert np.allclose(expected_y, y), (expected_y, y)
def test_yolov2_post_process(): tf.InteractiveSession() image_size = [96, 64] batch_size = 2 classes = Pascalvoc2007.classes anchors = [(0.1, 0.2), (1.2, 1.1)] data_format = "NHWC" score_threshold = 0.25 nms_iou_threshold = 0.5 model = YoloV2( image_size=image_size, batch_size=batch_size, classes=classes, anchors=anchors, data_format=data_format, score_threshold=score_threshold, nms_iou_threshold=nms_iou_threshold, ) post_process = Sequence([ FormatYoloV2( image_size=image_size, classes=classes, anchors=anchors, data_format=data_format, ), ExcludeLowScoreBox(threshold=score_threshold), NMS( iou_threshold=nms_iou_threshold, classes=classes, ), ]) shape = (batch_size, len(anchors) * (len(classes) + 5), image_size[0] // 32, image_size[1] // 32) np_output = np.random.uniform(-2., 2., size=shape).astype(np.float32) output = tf.constant(np_output) ys = model.post_process(output) expected_ys = post_process(outputs=np_output)["outputs"] for y, expected_y in zip(ys, expected_ys): assert np.allclose(y.eval(), expected_y), (y.eval(), expected_y)
anchors = [(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)] score_threshold = 0.05 nms_iou_threshold = 0.5 nms_max_output_size = 100 POST_PROCESSOR = Sequence([ FormatYoloV2( image_size=IMAGE_SIZE, classes=CLASSES, anchors=anchors, data_format=DATA_FORMAT, ), ExcludeLowScoreBox(threshold=score_threshold), NMS( iou_threshold=nms_iou_threshold, max_output_size=nms_max_output_size, classes=CLASSES, ), ]) NETWORK = SmartDict() NETWORK.OPTIMIZER_CLASS = tf.compat.v1.train.MomentumOptimizer NETWORK.OPTIMIZER_KWARGS = {"momentum": 0.9} NETWORK.LEARNING_RATE_FUNC = tf.compat.v1.train.piecewise_constant _epoch_steps = 16551 // BATCH_SIZE NETWORK.LEARNING_RATE_KWARGS = { "values": [1e-6, 1e-4, 1e-5, 1e-6, 1e-7], "boundaries": [_epoch_steps, _epoch_steps * 10, _epoch_steps * 60, _epoch_steps * 90], } NETWORK.IMAGE_SIZE = IMAGE_SIZE
TFDSPerImageStandardization()]) anchors = [ (0.5, 0.25), (1.0, 0.75), ] POST_PROCESSOR = Sequence([ FormatYoloV2( image_size=IMAGE_SIZE, classes=CLASSES, anchors=anchors, data_format=DATA_FORMAT, ), ExcludeLowScoreBox(threshold=0.05), NMS( iou_threshold=0.5, classes=CLASSES, ), ]) NETWORK = EasyDict() NETWORK.OPTIMIZER_CLASS = tf.train.AdamOptimizer NETWORK.OPTIMIZER_KWARGS = {"learning_rate": 0.001} NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT NETWORK.ANCHORS = anchors NETWORK.WEIGHT_DECAY_RATE = 0.0005 NETWORK.ACTIVATION_QUANTIZER = linear_mid_tread_half_quantizer NETWORK.ACTIVATION_QUANTIZER_KWARGS = {'bit': 2, 'max_value': 2} NETWORK.WEIGHT_QUANTIZER = binary_mean_scaling_quantizer NETWORK.WEIGHT_QUANTIZER_KWARGS = {}