def test_lm_bisenet_post_process(): """Verify LMBiSeNet.post_process() is the same as Bilinear and Softmax post process""" tf.InteractiveSession() image_size = [96, 64] batch_size = 2 classes = Camvid.classes data_format = "NHWC" model = LMBiSeNet( image_size=image_size, batch_size=batch_size, classes=classes, data_format=data_format, ) post_process = Sequence([ Bilinear( size=image_size, data_format=data_format, compatible_tensorflow_v1=True, ), Softmax() ]) shape = (batch_size, image_size[0] // 8, image_size[1] // 8, len(classes)) np_output = np.random.uniform(-10., 10., size=shape).astype(np.float32) output = tf.constant(np_output) output = model.post_process(output) expected = post_process(outputs=np_output)["outputs"] assert np.allclose(output.eval(), expected, atol=1e-5, rtol=1e-5)
def test_ytfaces_facial_landmarks_detection(): batch_size = 1 image_size = [256, 320] stride = 2 pre_processor = Sequence([ ResizeWithJoints(image_size=image_size), JointsToGaussianHeatmap(image_size=image_size, num_joints=68, stride=stride) ]) dataset = YoutubeFacialLandmarks(subset="train", batch_size=batch_size, pre_processor=pre_processor) dataset = DatasetIterator(dataset) for _ in range(5): images, labels = dataset.feed() assert isinstance(images, np.ndarray) assert images.shape[0] == batch_size assert images.shape[1] == image_size[0] assert images.shape[2] == image_size[1] assert images.shape[3] == 3 assert isinstance(labels, np.ndarray) assert labels.shape[0] == batch_size assert labels.shape[1] == image_size[0] // stride assert labels.shape[2] == image_size[1] // stride assert labels.shape[3] == 68 dataset = YoutubeFacialLandmarks(subset="validation", batch_size=batch_size, pre_processor=pre_processor) dataset = DatasetIterator(dataset) for _ in range(5): images, labels = dataset.feed() assert isinstance(images, np.ndarray) assert images.shape[0] == batch_size assert images.shape[1] == image_size[0] assert images.shape[2] == image_size[1] assert images.shape[3] == 3 assert isinstance(labels, np.ndarray) assert labels.shape[0] == batch_size assert labels.shape[1] == image_size[0] // stride assert labels.shape[2] == image_size[1] // stride assert labels.shape[3] == 68
def _build_process(module, processor_config=None): processors = [] processor_config = processor_config or [] for p in processor_config: for class_name, kwargs in p.items(): if hasattr(module, class_name): cls = getattr(module, class_name) else: cls = import_from_string(class_name) processor = cls.__new__(cls) processor.__dict__.update(kwargs or {}) processors.append(processor) return Sequence(processors=processors)
def test_yolov2_post_process(): tf.InteractiveSession() image_size = [96, 64] batch_size = 2 classes = Pascalvoc2007.classes anchors = [(0.1, 0.2), (1.2, 1.1)] data_format = "NHWC" score_threshold = 0.25 nms_iou_threshold = 0.5 model = YoloV2( image_size=image_size, batch_size=batch_size, classes=classes, anchors=anchors, data_format=data_format, score_threshold=score_threshold, nms_iou_threshold=nms_iou_threshold, ) post_process = Sequence([ FormatYoloV2( image_size=image_size, classes=classes, anchors=anchors, data_format=data_format, ), ExcludeLowScoreBox(threshold=score_threshold), NMS( iou_threshold=nms_iou_threshold, classes=classes, ), ]) shape = (batch_size, len(anchors) * (len(classes) + 5), image_size[0] // 32, image_size[1] // 32) np_output = np.random.uniform(-2., 2., size=shape).astype(np.float32) output = tf.constant(np_output) ys = model.post_process(output) expected_ys = post_process(outputs=np_output)["outputs"] for y, expected_y in zip(ys, expected_ys): assert np.allclose(y.eval(), expected_y), (y.eval(), expected_y)
def test_training(): """Test only no error raised.""" config = EasyDict() config.NETWORK_CLASS = LmSinglePoseV1Quantize config.DATASET_CLASS = MscocoSinglePersonKeypoints config.IS_DEBUG = False config.IMAGE_SIZE = [160, 160] config.BATCH_SIZE = 2 config.TEST_STEPS = 1 config.MAX_STEPS = 2 config.SAVE_CHECKPOINT_STEPS = 1 config.KEEP_CHECKPOINT_MAX = 5 config.SUMMARISE_STEPS = 1 config.IS_PRETRAIN = False config.IS_DISTRIBUTION = False config.TASK = Tasks.KEYPOINT_DETECTION # network model config config.NETWORK = EasyDict() config.NETWORK.OPTIMIZER_CLASS = tf.train.AdamOptimizer config.NETWORK.OPTIMIZER_KWARGS = {"learning_rate": 0.001} config.NETWORK.IMAGE_SIZE = config.IMAGE_SIZE config.NETWORK.BATCH_SIZE = config.BATCH_SIZE config.NETWORK.ACTIVATION_QUANTIZER = linear_mid_tread_half_quantizer config.NETWORK.ACTIVATION_QUANTIZER_KWARGS = { 'bit': 2, 'max_value': 2.0 } config.NETWORK.WEIGHT_QUANTIZER = binary_channel_wise_mean_scaling_quantizer config.NETWORK.WEIGHT_QUANTIZER_KWARGS = {} # daasegt config config.DATASET = EasyDict() config.DATASET.PRE_PROCESSOR = Sequence([ ResizeWithJoints(image_size=config.IMAGE_SIZE), JointsToGaussianHeatmap(image_size=config.IMAGE_SIZE, stride=2), DivideBy255()]) config.DATASET.BATCH_SIZE = config.BATCH_SIZE environment.init("test_lm_single_pose_v1") prepare_dirs(recreate=True) start_training(config, profile_step=1)
def test_sequence(): batch_size = 3 image_size = [256, 512] augmentor = Sequence([ FlipLeftRight(), FlipTopBottom(), SSDRandomCrop(), ]) dataset = Pascalvoc2007( batch_size=batch_size, pre_processor=ResizeWithGtBoxes(image_size), augmentor=augmentor, ) dataset = DatasetIterator(dataset) for _ in range(5): images, labels = dataset.feed() _show_images_with_boxes(images, labels)
def build_post_process(post_processor_config): module_name = "blueoil/post_processor" f, pathname, description = imp.find_module(module_name) module = imp.load_module(module_name, f, pathname, description) processors = [] if post_processor_config is None: post_processor_config = {} for p in post_processor_config: for class_name in p: class_args = p[class_name] if class_args is None: class_args = {} cls = getattr(module, class_name) # Create none initialized processor `cls` instance. processor = cls.__new__(cls) # Fill processor instance member. for k in class_args: v = class_args[k] processor.__dict__[k] = v processors.append(processor) seq = Sequence(processors=processors) return seq
self.prefetcher.terminate = True self.prefetcher.pool.close() self.prefetcher.pool.join() if __name__ == '__main__': from blueoil.datasets.cifar10 import Cifar10 from blueoil.data_processor import Sequence from blueoil.data_augmentor import FlipLeftRight, Hue, Blur cifar10 = Cifar10() augmentor = Sequence([ FlipLeftRight(0.5), Hue((-10, 10)), Blur(), ]) dataset_iterator = DatasetIterator(dataset=cifar10, enable_prefetch=True, augmentor=augmentor) time.sleep(2) import time t0 = time.time() data_batch = next(dataset_iterator) t1 = time.time() print("time of prefetch: {}".format(t1 - t0)) dataset_iterator2 = DatasetIterator(dataset=cifar10, enable_prefetch=False,
SUMMARISE_STEPS = 1000 # pretrain IS_PRETRAIN = False PRETRAIN_VARS = [] PRETRAIN_DIR = "" PRETRAIN_FILE = "" # for debug # BATCH_SIZE = 2 # SUMMARISE_STEPS = 1 # IS_DEBUG = True PRE_PROCESSOR = Sequence([ PerImageStandardization(), ]) POST_PROCESSOR = None NETWORK = SmartDict() NETWORK.OPTIMIZER_CLASS = tf.compat.v1.train.AdamOptimizer NETWORK.OPTIMIZER_KWARGS = {"learning_rate": 0.001} NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT DATASET = SmartDict() DATASET.BATCH_SIZE = BATCH_SIZE DATASET.DATA_FORMAT = DATA_FORMAT DATASET.PRE_PROCESSOR = PRE_PROCESSOR
PRETRAIN_VARS = [] PRETRAIN_DIR = "" PRETRAIN_FILE = "" # for debug # MAX_STEPS = 10 # BATCH_SIZE = 31 # SAVE_CHECKPOINT_STEPS = 2 # KEEP_CHECKPOINT_MAX = 5 # TEST_STEPS = 10 # SUMMARISE_STEPS = 2 # IS_DEBUG = True PRE_PROCESSOR = Sequence([ Resize(size=IMAGE_SIZE), PerImageStandardization(), ]) POST_PROCESSOR = None NETWORK = EasyDict() NETWORK.OPTIMIZER_CLASS = tf.compat.v1.train.MomentumOptimizer NETWORK.OPTIMIZER_KWARGS = {"momentum": 0.9} NETWORK.LEARNING_RATE_FUNC = tf.compat.v1.train.piecewise_constant step_per_epoch = 50000 // BATCH_SIZE NETWORK.LEARNING_RATE_KWARGS = { "values": [0.01, 0.1, 0.01, 0.001, 0.0001], "boundaries": [step_per_epoch, step_per_epoch * 50, step_per_epoch * 100, step_per_epoch * 198], } NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT
TFDS_PRE_PROCESSOR = TFDSProcessorSequence( [TFDSResizeWithGtBoxes(IMAGE_SIZE), TFDSPerImageStandardization()]) anchors = [ (0.5, 0.25), (1.0, 0.75), ] POST_PROCESSOR = Sequence([ FormatYoloV2( image_size=IMAGE_SIZE, classes=CLASSES, anchors=anchors, data_format=DATA_FORMAT, ), ExcludeLowScoreBox(threshold=0.05), NMS( iou_threshold=0.5, classes=CLASSES, ), ]) NETWORK = EasyDict() NETWORK.OPTIMIZER_CLASS = tf.train.AdamOptimizer NETWORK.OPTIMIZER_KWARGS = {"learning_rate": 0.001} NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT NETWORK.ANCHORS = anchors NETWORK.WEIGHT_DECAY_RATE = 0.0005
MAX_EPOCHS = 400 SAVE_CHECKPOINT_STEPS = 1000 KEEP_CHECKPOINT_MAX = 1 TEST_STEPS = 1000 SUMMARISE_STEPS = 10000 # pretrain IS_PRETRAIN = False PRETRAIN_VARS = [] PRETRAIN_DIR = "" PRETRAIN_FILE = "" PRE_PROCESSOR = Sequence([ ResizeWithGtBoxes(size=IMAGE_SIZE), PerImageStandardization() ]) anchors = [ (1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071) ] score_threshold = 0.05 nms_iou_threshold = 0.5 nms_max_output_size = 100 POST_PROCESSOR = Sequence([ FormatYoloV2( image_size=IMAGE_SIZE, classes=CLASSES, anchors=anchors, data_format=DATA_FORMAT, ), ExcludeLowScoreBox(threshold=score_threshold),
'ship', 'truck' ] MAX_EPOCHS = 100 SAVE_CHECKPOINT_STEPS = 1000 KEEP_CHECKPOINT_MAX = 1 TEST_STEPS = 1000 SUMMARISE_STEPS = 10000 # pretrain IS_PRETRAIN = False PRETRAIN_VARS = [] PRETRAIN_DIR = "" PRETRAIN_FILE = "" PRE_PROCESSOR = Sequence([Resize(size=IMAGE_SIZE), PerImageStandardization()]) POST_PROCESSOR = None NETWORK = EasyDict() NETWORK.OPTIMIZER_CLASS = tf.compat.v1.train.MomentumOptimizer NETWORK.OPTIMIZER_KWARGS = {'momentum': 0.9} NETWORK.LEARNING_RATE_FUNC = tf.compat.v1.train.piecewise_constant NETWORK.LEARNING_RATE_KWARGS = { 'values': [0.001, 0.0001, 1e-05, 1e-06], 'boundaries': [25781, 51562, 77343] } NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT
IS_PRETRAIN = False PRETRAIN_VARS = [] PRETRAIN_DIR = "" PRETRAIN_FILE = "" # for debug # BATCH_SIZE = 2 # SUMMARISE_STEPS = 1 # IS_DEBUG = True # stride of output heatmap. the smaller, the slower. STRIDE = 8 PRE_PROCESSOR = Sequence([ ResizeWithJoints(image_size=IMAGE_SIZE), JointsToGaussianHeatmap(image_size=IMAGE_SIZE, stride=STRIDE, sigma=2), DivideBy255() ]) POST_PROCESSOR = Sequence([ GaussianHeatmapToJoints(num_dimensions=2, stride=STRIDE, confidence_threshold=0.1) ]) step_per_epoch = 149813 // BATCH_SIZE NETWORK = EasyDict() NETWORK.OPTIMIZER_CLASS = tf.compat.v1.train.AdamOptimizer NETWORK.OPTIMIZER_KWARGS = {} NETWORK.LEARNING_RATE_FUNC = tf.compat.v1.train.piecewise_constant NETWORK.LEARNING_RATE_KWARGS = {
IS_DEBUG = False IMAGE_SIZE = [448, 448] BATCH_SIZE = 4 DATA_FORMAT = "NHWC" TASK = Tasks.CLASSIFICATION CLASSES = DATASET_CLASS.classes # for debug # MAX_STEPS = 100 # SAVE_CHECKPOINT_STEPS = 100 # TEST_STEPS = 10 # SUMMARISE_STEPS = 100 # IS_PRETRAIN = False # IS_DEBUG = True PRE_PROCESSOR = Sequence([Resize(size=IMAGE_SIZE), DivideBy255()]) POST_PROCESSOR = None NETWORK = SmartDict() NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT # dataset DATASET = SmartDict() DATASET.BATCH_SIZE = BATCH_SIZE DATASET.DATA_FORMAT = DATA_FORMAT DATASET.PRE_PROCESSOR = PRE_PROCESSOR
SUMMARISE_STEPS = 1000 # pretrain IS_PRETRAIN = False PRETRAIN_VARS = [] PRETRAIN_DIR = "" PRETRAIN_FILE = "" # for debug # BATCH_SIZE = 2 # SUMMARISE_STEPS = 1 # IS_DEBUG = True PRE_PROCESSOR = Sequence([ DivideBy255(), ]) POST_PROCESSOR = None NETWORK = EasyDict() NETWORK.OPTIMIZER_CLASS = tf.compat.v1.train.AdamOptimizer NETWORK.OPTIMIZER_KWARGS = {"learning_rate": 0.001} NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT NETWORK.ACTIVATION_QUANTIZER = linear_mid_tread_half_quantizer NETWORK.ACTIVATION_QUANTIZER_KWARGS = { 'bit': 2, 'max_value': 2 } NETWORK.WEIGHT_QUANTIZER = binary_mean_scaling_quantizer
IMAGE_SIZE = [416, 416] BATCH_SIZE = 8 DATA_FORMAT = "NCHW" TASK = Tasks.OBJECT_DETECTION CLASSES = DATASET_CLASS.classes # for debug # MAX_STEPS = 100 # SAVE_CHECKPOINT_STEPS = 100 # TEST_STEPS = 10 # SUMMARISE_STEPS = 100 # IS_PRETRAIN = False # IS_DEBUG = True PRE_PROCESSOR = Sequence([ ResizeWithGtBoxes(size=IMAGE_SIZE), DivideBy255(), ]) anchors = [(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)] score_threshold = 0.005 nms_iou_threshold = 0.55 nms_max_output_size = 100 POST_PROCESSOR = Sequence([ FormatYoloV2( image_size=IMAGE_SIZE, classes=CLASSES, anchors=anchors, data_format=DATA_FORMAT, ), ExcludeLowScoreBox(threshold=score_threshold), NMS(
SUMMARISE_STEPS = 1000 # pretrain IS_PRETRAIN = False PRETRAIN_VARS = [] PRETRAIN_DIR = "" PRETRAIN_FILE = "" # for debug # BATCH_SIZE = 2 # SUMMARISE_STEPS = 1 # IS_DEBUG = True PRE_PROCESSOR = Sequence([ Resize(size=IMAGE_SIZE), PerImageStandardization(), ]) POST_PROCESSOR = Sequence([ Bilinear(size=IMAGE_SIZE, data_format=DATA_FORMAT, compatible_tensorflow_v1=True), Softmax(), ]) NETWORK = SmartDict() NETWORK.OPTIMIZER_CLASS = tf.compat.v1.train.AdamOptimizer NETWORK.OPTIMIZER_KWARGS = {"learning_rate": 0.001} NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT NETWORK.WEIGHT_DECAY_RATE = 0. NETWORK.AUXILIARY_LOSS_WEIGHT = 0.5
"conv2/kernel:", "conv2/bias:", "conv3/kernel:", "conv3/bias:", "conv4/kernel:", "conv4/bias:", "conv5/kernel:", "conv5/bias:", "conv6/kernel:", "conv6/bias:", ] PRETRAIN_DIR = "saved/lmnet_0.01_caltech101/checkpoints" PRETRAIN_FILE = "save.ckpt-99001" PRE_PROCESSOR = Sequence([ Resize(size=IMAGE_SIZE), PerImageStandardization() ]) POST_PROCESSOR = None NETWORK = SmartDict() NETWORK.OPTIMIZER_CLASS = tf.compat.v1.train.AdamOptimizer NETWORK.OPTIMIZER_KWARGS = {"learning_rate": 0.001} NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT NETWORK.WEIGHT_DECAY_RATE = 0.0005 NETWORK.ACTIVATION_QUANTIZER = linear_mid_tread_half_quantizer NETWORK.ACTIVATION_QUANTIZER_KWARGS = { 'bit': 2, 'max_value': 2 }
CLASSES = DATASET_CLASS.classes MAX_STEPS = 100000 SAVE_CHECKPOINT_STEPS = 5000 KEEP_CHECKPOINT_MAX = 5 TEST_STEPS = 1000 SUMMARISE_STEPS = 100 # pretrain IS_PRETRAIN = False PRETRAIN_VARS = [] PRETRAIN_DIR = "" PRETRAIN_FILE = "" PRE_PROCESSOR = Sequence([ Resize(size=IMAGE_SIZE), DivideBy255(), ]) POST_PROCESSOR = None STEP_PER_EPOCH = 50000 // BATCH_SIZE TUNE_SPEC = { 'run': 'tunable', 'resources_per_trial': { "cpu": 2, "gpu": 0.5 }, 'stop': { 'mean_accuracy': 0.87, 'training_iteration': 200, },
CLASSES = DATASET_CLASS.classes MAX_STEPS = 2 SAVE_CHECKPOINT_STEPS = 1 KEEP_CHECKPOINT_MAX = 5 TEST_STEPS = 100 SUMMARISE_STEPS = 100 # pretrain IS_PRETRAIN = False PRETRAIN_VARS = [] PRETRAIN_DIR = "" PRETRAIN_FILE = "" PRE_PROCESSOR = Sequence( [ResizeWithGtBoxes(IMAGE_SIZE), PerImageStandardization()]) anchors = [ (0.5, 0.25), (1.0, 0.75), ] POST_PROCESSOR = Sequence([ FormatYoloV2( image_size=IMAGE_SIZE, classes=CLASSES, anchors=anchors, data_format=DATA_FORMAT, ), ExcludeLowScoreBox(threshold=0.05), NMS(
TASK = Tasks.OBJECT_DETECTION CLASSES = DATASET_CLASS.classes KEEP_CHECKPOINT_MAX = 5 MAX_EPOCHS = 100 SAVE_CHECKPOINT_STEPS = 100 TEST_STEPS = 100 SUMMARISE_STEPS = 10 # pretrain IS_PRETRAIN = False PRETRAIN_VARS = [] PRETRAIN_DIR = "" PRETRAIN_FILE = "" PRE_PROCESSOR = Sequence([ResizeWithGtBoxes(size=IMAGE_SIZE), DivideBy255()]) anchors = [(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)] score_threshold = 0.05 nms_iou_threshold = 0.5 nms_max_output_size = 100 POST_PROCESSOR = Sequence([ FormatYoloV2( image_size=IMAGE_SIZE, classes=CLASSES, anchors=anchors, data_format=DATA_FORMAT, ), ExcludeLowScoreBox(threshold=score_threshold), NMS( iou_threshold=nms_iou_threshold,
# pretrain IS_PRETRAIN = False PRETRAIN_VARS = [] PRETRAIN_DIR = "" PRETRAIN_FILE = "" # for debug # MAX_STEPS = 10 # BATCH_SIZE = 31 # SAVE_CHECKPOINT_STEPS = 2 # TEST_STEPS = 10 # SUMMARISE_STEPS = 2 # IS_DEBUG = True PRE_PROCESSOR = Sequence([Resize(size=IMAGE_SIZE), DivideBy255()]) POST_PROCESSOR = None NETWORK = EasyDict() NETWORK.OPTIMIZER_CLASS = tf.compat.v1.train.MomentumOptimizer NETWORK.OPTIMIZER_KWARGS = {"momentum": 0.9} NETWORK.LEARNING_RATE_FUNC = tf.compat.v1.train.polynomial_decay # TODO(wakiska): It is same as original yolov2 paper (batch size = 128). NETWORK.LEARNING_RATE_KWARGS = { "learning_rate": 1e-1, "decay_steps": 1600000, "power": 4.0, "end_learning_rate": 0.0 } NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE