# rotation U([-17 deg, +17 deg]) # scaling U([0.9, 2.0]) # Pixel-Wise transformation # Gaussian noise N(0, 1) * U([0.0, 0.04 * (255)]) # contrast U([0.2, 1.4]) # color U([0.5, 2.0]) # gamma U([0.7, 1.5]) # brightness 1 + 0.2 * N(0, 1) # NOTE (by KI-42) in this setup, I modified the augmentation setup described above a little bit. # hue U([-128 deg, 128 deg]) # brightness U(0.6, 1.4) DATASET.AUGMENTOR = Sequence([ # Geometric transformation FlipLeftRight(0.5), FlipTopBottom(0.5), Translate(-0.2, 0.2), Rotate(-17, +17), Scale(1.0, 2.0), # Pixel-wise augmentation Brightness(0.6, 1.4), Contrast(0.2, 1.4), Color(0.5, 2.0), Gamma(0.7, 1.5), # Hue(-128.0, 128.0), GaussianNoise(10.0) # GaussianBlur(0.0, 2.0) ]) DATASET.PRE_PROCESSOR = PRE_PROCESSOR
PRE_PROCESSOR = Sequence([Resize(size=IMAGE_SIZE), PerImageStandardization()]) POST_PROCESSOR = None NETWORK = EasyDict() NETWORK.OPTIMIZER_CLASS = tf.train.MomentumOptimizer NETWORK.OPTIMIZER_KWARGS = {"momentum": 0.9} NETWORK.LEARNING_RATE_FUNC = tf.train.piecewise_constant NETWORK.LEARNING_RATE_KWARGS = { "values": [0.1, 0.01, 0.001, 0.0001], "boundaries": [40000, 60000, 80000], } NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT NETWORK.WEIGHT_DECAY_RATE = 0.0001 NETWORK.ACTIVATION_QUANTIZER = linear_mid_tread_half_quantizer NETWORK.ACTIVATION_QUANTIZER_KWARGS = {'bit': 2, 'max_value': 2} NETWORK.WEIGHT_QUANTIZER = binary_mean_scaling_quantizer NETWORK.WEIGHT_QUANTIZER_KWARGS = {} # dataset DATASET = EasyDict() DATASET.BATCH_SIZE = BATCH_SIZE DATASET.DATA_FORMAT = DATA_FORMAT DATASET.PRE_PROCESSOR = PRE_PROCESSOR DATASET.AUGMENTOR = Sequence([ Pad(2), Crop(size=IMAGE_SIZE), FlipLeftRight(), ])
}, ]), } NETWORK = EasyDict() NETWORK.OPTIMIZER_CLASS = None NETWORK.OPTIMIZER_KWARGS = {} NETWORK.LEARNING_RATE_FUNC = None NETWORK.LEARNING_RATE_KWARGS = {} NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT NETWORK.ACTIVATION_QUANTIZER = linear_mid_tread_half_quantizer NETWORK.ACTIVATION_QUANTIZER_KWARGS = {'bit': 2, 'max_value': 2} NETWORK.WEIGHT_QUANTIZER = binary_mean_scaling_quantizer NETWORK.WEIGHT_QUANTIZER_KWARGS = {} DATASET = EasyDict() DATASET.BATCH_SIZE = BATCH_SIZE DATASET.DATA_FORMAT = DATA_FORMAT DATASET.PRE_PROCESSOR = PRE_PROCESSOR DATASET.AUGMENTOR = Sequence([ Brightness((0.75, 1.25)), Color((0.75, 1.25)), Contrast((0.75, 1.25)), FlipLeftRight(), Hue((-10, 10)), ]) DATASET.ENABLE_PREFETCH = False
} NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT NETWORK.ANCHORS = anchors NETWORK.OBJECT_SCALE = 5.0 NETWORK.NO_OBJECT_SCALE = 1.0 NETWORK.CLASS_SCALE = 1.0 NETWORK.COORDINATE_SCALE = 1.0 NETWORK.LOSS_IOU_THRESHOLD = 0.6 NETWORK.WEIGHT_DECAY_RATE = 0.0005 NETWORK.SCORE_THRESHOLD = score_threshold NETWORK.NMS_IOU_THRESHOLD = nms_iou_threshold NETWORK.NMS_MAX_OUTPUT_SIZE = nms_max_output_size NETWORK.LOSS_WARMUP_STEPS = int(8000 / BATCH_SIZE) # dataset DATASET = EasyDict() DATASET.BATCH_SIZE = BATCH_SIZE DATASET.DATA_FORMAT = DATA_FORMAT DATASET.PRE_PROCESSOR = PRE_PROCESSOR DATASET.AUGMENTOR = Sequence([ FlipLeftRight(), Brightness((0.75, 1.25)), Color((0.75, 1.25)), Contrast((0.75, 1.25)), Hue((-10, 10)), SSDRandomCrop(min_crop_ratio=0.7), ]) DATASET.ENABLE_PREFETCH = True
# pretrain IS_PRETRAIN = False PRETRAIN_VARS = [] PRETRAIN_DIR = "" PRETRAIN_FILE = "" PRE_PROCESSOR = Sequence([Resize(size=IMAGE_SIZE), PerImageStandardization()]) POST_PROCESSOR = None NETWORK = EasyDict() NETWORK.OPTIMIZER_CLASS = tf.train.AdamOptimizer NETWORK.OPTIMIZER_KWARGS = {"learning_rate": 0.001} NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT NETWORK.WEIGHT_DECAY_RATE = 0.0005 NETWORK.ACTIVATION_QUANTIZER = linear_mid_tread_half_quantizer NETWORK.ACTIVATION_QUANTIZER_KWARGS = {'bit': 2, 'max_value': 2} NETWORK.WEIGHT_QUANTIZER = binary_mean_scaling_quantizer NETWORK.WEIGHT_QUANTIZER_KWARGS = {} # dataset DATASET = EasyDict() DATASET.BATCH_SIZE = BATCH_SIZE DATASET.DATA_FORMAT = DATA_FORMAT DATASET.PRE_PROCESSOR = PRE_PROCESSOR DATASET.AUGMENTOR = Sequence([ FlipLeftRight(), ])
NETWORK.OPTIMIZER_KWARGS = {"beta1": 0.9, "beta2": 0.999} NETWORK.LEARNING_RATE_FUNC = tf.train.piecewise_constant NETWORK.LEARNING_RATE_KWARGS = { "values": [0.0001, 0.00005, 0.000025, 0.0000125, 0.00000625], "boundaries": [400000, 600000, 800000, 1000000], } NETWORK.CONV_DEPTH = CONV_DEPTH NETWORK.DIV_FLOW = 20.0 NETWORK.WEIGHT_DECAY_RATE = 0.0004 NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT # dataset DATASET = EasyDict() DATASET.BATCH_SIZE = BATCH_SIZE DATASET.DATA_FORMAT = DATA_FORMAT DATASET.SLICE_STEP = SLICE_STEP DATASET.TRAIN_ENABLE_PREFETCH = True DATASET.TRAIN_PROCESS_NUM = 10 DATASET.TRAIN_QUEUE_SIZE = 1000 DATASET.VALIDATION_ENABLE_PREFETCH = False DATASET.VALIDATION_PRE_LOAD = False DATASET.VALIDATION_PROCESS_NUM = 1 DATASET.VALIDATION_QUEUE_SIZE = 500 DATASET.VALIDATION_RATE = 0.1 DATASET.VALIDATION_SEED = 2019 DATASET.AUGMENTOR = AUGMENTOR DATASET.PRE_PROCESSOR = PRE_PROCESSOR
Resize(size=IMAGE_SIZE), {% if quantize_first_convolution %}DivideBy255(){% else %}PerImageStandardization(){% endif %} ]) POST_PROCESSOR = None NETWORK = EasyDict() NETWORK.OPTIMIZER_CLASS = {{optimizer_class}} NETWORK.OPTIMIZER_KWARGS = {{optimizer_kwargs}} NETWORK.LEARNING_RATE_FUNC = {{learning_rate_func}} NETWORK.LEARNING_RATE_KWARGS = {{learning_rate_kwargs}} NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT NETWORK.ACTIVATION_QUANTIZER = linear_mid_tread_half_quantizer NETWORK.ACTIVATION_QUANTIZER_KWARGS = { 'bit': 2, 'max_value': 2 } NETWORK.WEIGHT_QUANTIZER = binary_mean_scaling_quantizer NETWORK.WEIGHT_QUANTIZER_KWARGS = {} DATASET = EasyDict() DATASET.BATCH_SIZE = BATCH_SIZE DATASET.DATA_FORMAT = DATA_FORMAT DATASET.PRE_PROCESSOR = PRE_PROCESSOR DATASET.AUGMENTOR = Sequence([{% if data_augmentation %}{% for augmentor in data_augmentation %} {{ augmentor[0] }}({% for d_name, d_value in augmentor[1] %}{{ d_name }}={{ d_value }}, {% endfor %}),{% endfor %} {% endif %}]) DATASET.ENABLE_PREFETCH = {{ dataset_prefetch }}
NETWORK.WEIGHT_DECAY_RATE = 0.0005 NETWORK.SCORE_THRESHOLD = score_threshold NETWORK.NMS_IOU_THRESHOLD = nms_iou_threshold NETWORK.NMS_MAX_OUTPUT_SIZE = nms_max_output_size NETWORK.LOSS_WARMUP_STEPS = int(8000 / BATCH_SIZE) # quantize NETWORK.ACTIVATION_QUANTIZER = linear_mid_tread_half_quantizer NETWORK.ACTIVATION_QUANTIZER_KWARGS = { 'bit': 2, 'max_value': 2.0 } NETWORK.WEIGHT_QUANTIZER = binary_channel_wise_mean_scaling_quantizer NETWORK.WEIGHT_QUANTIZER_KWARGS = {} NETWORK.QUANTIZE_FIRST_CONVOLUTION = False NETWORK.QUANTIZE_LAST_CONVOLUTION = False # dataset DATASET = EasyDict() DATASET.BATCH_SIZE = BATCH_SIZE DATASET.DATA_FORMAT = DATA_FORMAT DATASET.PRE_PROCESSOR = PRE_PROCESSOR DATASET.AUGMENTOR = Sequence([ Brightness(value=(0.75, 1.25), ), Color(value=(0.75, 1.25), ), FlipLeftRight(probability=0.5, ), Hue(value=(-10, 10), ), SSDRandomCrop(min_crop_ratio=0.3, ), ]) DATASET.ENABLE_PREFETCH = True
NETWORK = EasyDict() NETWORK.OPTIMIZER_CLASS = tf.compat.v1.train.AdamOptimizer NETWORK.OPTIMIZER_KWARGS = {"learning_rate": 0.001} NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT NETWORK.WEIGHT_DECAY_RATE = 0. NETWORK.AUXILIARY_LOSS_WEIGHT = 0.5 NETWORK.USE_FEATURE_FUSION = True NETWORK.USE_ATTENTION_REFINEMENT = True NETWORK.USE_TAIL_GAP = True NETWORK.ACTIVATION_QUANTIZER = linear_mid_tread_half_quantizer NETWORK.ACTIVATION_QUANTIZER_KWARGS = {'bit': 2, 'max_value': 2} NETWORK.WEIGHT_QUANTIZER = binary_channel_wise_mean_scaling_quantizer NETWORK.WEIGHT_QUANTIZER_KWARGS = {} DATASET = EasyDict() DATASET.BATCH_SIZE = BATCH_SIZE DATASET.DATA_FORMAT = DATA_FORMAT DATASET.PRE_PROCESSOR = PRE_PROCESSOR DATASET.AUGMENTOR = Sequence([ Resize(size=IMAGE_SIZE), Brightness((0.75, 1.25)), Color((0.75, 1.25)), Contrast((0.75, 1.25)), FlipLeftRight(), Hue((-10, 10)), ]) DATASET.ENABLE_PREFETCH = True
step_per_epoch = 149813 // BATCH_SIZE NETWORK = EasyDict() NETWORK.OPTIMIZER_CLASS = tf.compat.v1.train.AdamOptimizer NETWORK.OPTIMIZER_KWARGS = {} NETWORK.LEARNING_RATE_FUNC = tf.compat.v1.train.piecewise_constant NETWORK.LEARNING_RATE_KWARGS = { "values": [1e-4, 1e-3, 1e-4, 1e-5], "boundaries": [5000, step_per_epoch * 5, step_per_epoch * 10], } NETWORK.STRIDE = STRIDE NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT NETWORK.ACTIVATION_QUANTIZER = linear_mid_tread_half_quantizer NETWORK.ACTIVATION_QUANTIZER_KWARGS = {'bit': 2, 'max_value': 2} NETWORK.WEIGHT_QUANTIZER = binary_channel_wise_mean_scaling_quantizer NETWORK.WEIGHT_QUANTIZER_KWARGS = {} DATASET = EasyDict() DATASET.IMAGE_SIZE = IMAGE_SIZE DATASET.BATCH_SIZE = BATCH_SIZE DATASET.DATA_FORMAT = DATA_FORMAT DATASET.PRE_PROCESSOR = PRE_PROCESSOR DATASET.AUGMENTOR = Sequence( [Brightness((0.75, 1.25)), Color((0.75, 1.25)), Contrast((0.75, 1.25))]) DATASET.ENABLE_PREFETCH = True
data_format=DATA_FORMAT, ), ExcludeLowScoreBox(threshold=0.05), NMS( iou_threshold=0.5, classes=CLASSES, ), ]) NETWORK = EasyDict() NETWORK.OPTIMIZER_CLASS = tf.train.AdamOptimizer NETWORK.OPTIMIZER_KWARGS = {"learning_rate": 0.001} NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT NETWORK.ANCHORS = anchors NETWORK.WEIGHT_DECAY_RATE = 0.0005 NETWORK.ACTIVATION_QUANTIZER = linear_mid_tread_half_quantizer NETWORK.ACTIVATION_QUANTIZER_KWARGS = {'bit': 2, 'max_value': 2} NETWORK.WEIGHT_QUANTIZER = binary_mean_scaling_quantizer NETWORK.WEIGHT_QUANTIZER_KWARGS = {} # dataset DATASET = EasyDict() DATASET.BATCH_SIZE = BATCH_SIZE DATASET.DATA_FORMAT = DATA_FORMAT DATASET.PRE_PROCESSOR = PRE_PROCESSOR DATASET.AUGMENTOR = Sequence([ FlipLeftRight(is_bounding_box=True), ])
NETWORK.OPTIMIZER_KWARGS = {'momentum': 0.9} NETWORK.LEARNING_RATE_FUNC = tf.compat.v1.train.piecewise_constant NETWORK.LEARNING_RATE_KWARGS = { 'values': [0.001, 0.0001, 1e-05, 1e-06], 'boundaries': [25781, 51562, 77343] } NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT NETWORK.WEIGHT_DECAY_RATE = 0.0005 # quantize NETWORK.ACTIVATION_QUANTIZER = linear_mid_tread_half_quantizer NETWORK.ACTIVATION_QUANTIZER_KWARGS = {'bit': 2, 'max_value': 2} NETWORK.WEIGHT_QUANTIZER = binary_mean_scaling_quantizer NETWORK.WEIGHT_QUANTIZER_KWARGS = {} # dataset DATASET = EasyDict() DATASET.BATCH_SIZE = BATCH_SIZE DATASET.DATA_FORMAT = DATA_FORMAT DATASET.PRE_PROCESSOR = PRE_PROCESSOR DATASET.AUGMENTOR = Sequence([ Brightness(value=(0.75, 1.25), ), Color(value=(0.75, 1.25), ), FlipLeftRight(probability=0.5, ), Hue(value=(-10, 10), ), ]) DATASET.ENABLE_PREFETCH = True
Resize(size=IMAGE_SIZE), {% if quantize_first_convolution %}DivideBy255(){% else %}PerImageStandardization(){% endif %} ]) POST_PROCESSOR = None NETWORK = EasyDict() NETWORK.OPTIMIZER_CLASS = {{optimizer_class}} NETWORK.OPTIMIZER_KWARGS = {{optimizer_kwargs}} NETWORK.LEARNING_RATE_FUNC = {{learning_rate_func}} NETWORK.LEARNING_RATE_KWARGS = {{learning_rate_kwargs}} NETWORK.IMAGE_SIZE = IMAGE_SIZE NETWORK.BATCH_SIZE = BATCH_SIZE NETWORK.DATA_FORMAT = DATA_FORMAT NETWORK.ACTIVATION_QUANTIZER = linear_mid_tread_half_quantizer NETWORK.ACTIVATION_QUANTIZER_KWARGS = { 'bit': 2, 'max_value': 2 } NETWORK.WEIGHT_QUANTIZER = binary_mean_scaling_quantizer NETWORK.WEIGHT_QUANTIZER_KWARGS = {} DATASET = EasyDict() DATASET.BATCH_SIZE = BATCH_SIZE DATASET.DATA_FORMAT = DATA_FORMAT DATASET.PRE_PROCESSOR = PRE_PROCESSOR DATASET.AUGMENTOR = Sequence([{% if data_augmentation %}{% for aug_name, aug_val in data_augmentation.items() %} {{ aug_name }}({% for param_name, param_value in aug_val %}{{ param_name }}={{ param_value }}, {% endfor %}),{% endfor %} {% endif %}]) DATASET.ENABLE_PREFETCH = {{ dataset_prefetch }}