def default_detection_configs(phi, num_classes=90, min_level=3, max_level=7, fpn_filters=64, neck_repeats=3, head_repeats=3, anchor_scale=4, num_scales=3, batch_size=4, image_size=512, fpn_name="BiFPN", fpn_input_dims=[80, 192, 320], fusion_type="weighted_sum"): h = Config() h.detector = "EfficientDetD%d" % phi h.dtype = "float32" h.num_classes = num_classes h.backbone = dict( backbone="EfficientNetB%d" % phi, convolution="depthwise_conv2d", dropblock=None, # dropblock=dict(keep_prob=None, # block_size=None) normalization=dict(normalization="batch_norm", momentum=0.99, epsilon=1e-3, axis=-1, trainable=False), activation=dict(activation="swish"), strides=[2, 1, 2, 2, 2, 1, 2, 1], dilation_rates=[1, 1, 1, 1, 1, 1, 1, 1], output_indices=[3, 4, 5], frozen_stages=[ -1, ]) h.neck = dict(neck=fpn_name, input_size=image_size if isinstance( image_size, (list, tuple)) else [image_size, image_size], num_backbone_levels=3, feat_dims=fpn_filters, repeats=neck_repeats, convolution="separable_conv2d", normalization=dict(normalization="batch_norm", momentum=0.99, epsilon=1e-3, axis=-1, trainable=False), activation=dict(activation="swish"), min_level=3, max_level=7, input_dims=fpn_input_dims, pool_type=None, apply_bn=True, fusion_type=fusion_type) # anchors parameters strides = [2**l for l in range(min_level, max_level + 1)] scales = [[ 2**(i / num_scales) * s * anchor_scale for i in range(num_scales) ] for s in strides] aspect_ratios = [1., 0.5, 2.] num_scales = len(scales[0]) * len(aspect_ratios) h.anchors = dict(aspect_ratios=aspect_ratios, strides=strides, scales=scales, num_anchors=num_scales) h.head = dict(head="RetinaNetHead", convolution="separable_conv2d", normalization=dict(normalization="batch_norm", momentum=0.99, epsilon=1e-3, axis=-1, trainable=False), activation=dict(activation="swish"), feat_dims=fpn_filters, dropblock=None, repeats=head_repeats, min_level=min_level, max_level=max_level, use_sigmoid=True, prior=0.01, survival_prob=None, data_format="channels_last", bbox_encoder=dict(encoder="Box2Delta", weights=None), bbox_decoder=dict(decoder="Delta2Box", weights=None), assigner=dict(assigner="ATSSAssigner", topk=9), sampler=dict(sampler="PseudoSampler"), label_loss=dict(loss="FocalLoss", gamma=2.0, alpha=0.25, label_smoothing=0.01, weight=1., from_logits=True, reduction="sum"), bbox_loss=dict(loss="CIoULoss", weight=1., reduction="sum")) h.weight_decay = 4e-5 h.train = dict( input_size=(image_size, image_size), dataset=dict( dataset="COCODataset", batch_size=batch_size, dataset_dir="/data/bail/COCO", training=True, augmentations=[ dict(FlipLeftToRight=dict(probability=0.5)), dict(RandomDistortColor=dict(probability=1.)), dict(Resize=dict(size=(image_size, image_size), strides=32, min_scale=0.5, max_scale=2.0)), # dict(ResizeV2=dict(short_side=800, long_side=1333, strides=64, min_scale=1.0, max_scale=1.0)) ], num_samples=118287, num_classes=num_classes), pretrained_weights_path= "/data/bail/pretrained_weights/resnet50/resnet50.ckpt", optimizer=dict(optimizer="SGD", momentum=0.9), mixed_precision=dict( loss_scale=None ), # The loss scale in mixed precision training. If None, use dynamic. gradient_clip_norm=.0, scheduler=dict( train_epochs=18, # learning_rate_scheduler=dict(scheduler="PiecewiseConstantDecay", # boundaries=[8, 11], # values=[0.02, 0.002, 0.0002]), learning_rate_scheduler=dict(scheduler="CosineDecay", initial_learning_rate=0.02), warmup=dict(warmup_learning_rate=0.0001, steps=800)), checkpoint_dir="checkpoints/efficientdet-d%d" % phi, summary_dir="logs/efficientdet-d%d" % phi, log_every_n_steps=100, save_ckpt_steps=5000) h.val = dict( dataset=dict( dataset="COCODataset", batch_size=batch_size, dataset_dir="/data/bail/COCO", training=False, augmentations=[ dict(Resize=dict(size=(image_size, image_size), strides=32, min_scale=1.0, max_scale=1.0)) # dict(ResizeV2=dict(short_side=800, long_side=1333, strides=64, min_scale=1.0, max_scale=1.0)) ]), input_size=(image_size, image_size), samples=5000) h.test = dict( nms="CombinedNonMaxSuppression", pre_nms_size=5000, # select top_k high confident detections for nms post_nms_size=100, iou_threshold=0.6, score_threshold=0.1, ) return h
def get_onenet_config(num_classes=80): h = Config() input_size = (512, 512) h.detector = "OneNet" h.dtype = "float16" h.data_format = "channels_last" h.input_shape = (input_size[0], input_size[1], 3) h.num_classes = num_classes h.backbone = dict(backbone="ResNet18", dropblock=None, normalization=dict(normalization="batch_norm", momentum=0.997, epsilon=1e-4, trainable=False), activation=dict(activation="relu"), strides=[2, 2, 2, 2, 2], dilation_rates=[1, 1, 1, 1, 1], output_indices=[2, 3, 4, 5], frozen_stages=[ 1, ]) h.neck = dict(neck="CenterNetDeconv", normalization=dict(normalization="batch_norm", momentum=0.997, epsilon=1e-4, trainable=False), activation=dict(activation="relu")) h.head = dict(head="OneNetHead", activation=dict(activation="relu"), feat_dims=64, dropblock=None, num_classes=num_classes, strides=4, prior=0.01, use_sigmoid=True, assigner=dict(assigner="MinCostAssigner", class_weight=2., l1_weight=2., iou_weight=5., iou_type="giou", alpha=0.25, gamma=2.), label_loss=dict(loss="FocalLoss", alpha=0.25, gamma=2., reduction="sum"), bbox_loss=dict(loss="RegL1Loss", weight=1., reduction="sum")) h.weight_decay = 1e-4 h.excluding_weight_names = ["predicted_box", "predicted_class"] h.train = dict( dataset=dict(dataset="COCODataset", batch_size=4, dataset_dir="/data/bail/COCO", training=True, augmentations=[ dict(augmentation="FlipLeftToRight", probability=0.5), dict(augmentation="RandomDistortColor"), dict(augmentation="Resize", img_scale=(0.2, 2), multiscale_mode="range", keep_ratio=True), dict(augmentation="RandCropOrPad", size=(input_size, input_size), clip_box_base_center=False), ], num_samples=118287), pretrained_weights_path= "/data/bail/pretrained_weights/resnet50/resnet50.ckpt", optimizer=dict(optimizer="SGD", momentum=0.9), mixed_precision=dict( loss_scale=None ), # The loss scale in mixed precision training. If None, use dynamic. gradient_clip_norm=10.0, scheduler=dict(train_epochs=24, learning_rate_scheduler=dict( scheduler="PiecewiseConstantDecay", boundaries=[16, 22], values=[0.02, 0.002, 0.0002]), warmup=dict(warmup_learning_rate=0.001, steps=800)), checkpoint_dir="checkpoints/onenet", summary_dir="logs/onenet", log_every_n_steps=100, save_ckpt_steps=5000) h.val = dict(dataset=dict(dataset="COCODataset", batch_size=4, dataset_dir="/data/bail/COCO", training=False, augmentations=[ dict(augmentation="Resize", img_scale=[(1333, input_size)], keep_ratio=True), dict(augmentation="Pad", size_divisor=32) ]), samples=5000) h.test = dict(topk=100, score_threshold=0.3) return h
def get_gfl_config(num_classes=80): h = Config() input_size = (1024, 1024) h.detector = "GFL" h.dtype = "float16" h.data_format = "channels_last" h.input_shape = (input_size[0], input_size[1], 3) h.num_classes = num_classes h.backbone = dict(backbone="ResNet101", dropblock=None, normalization=dict(normalization="batch_norm", momentum=0.997, epsilon=1e-4, trainable=False), activation=dict(activation="relu"), strides=[2, 2, 2, 2, 2], dilation_rates=[1, 1, 1, 1, 1], output_indices=[3, 4, 5], frozen_stages=[1, ]) h.neck=dict(neck="FPN", feat_dims=256, min_level=3, max_level=5, num_output_levels=5, add_extra_convs=True, relu_before_extra_convs=True) h.anchors=dict(generator="AnchorGeneratorV2", aspect_ratios=[1.], octave_base_scale=8, scales_per_octave=1, strides=[8, 16, 32, 64, 128], num_anchors=1) h.head=dict(head="GFLHead", normalization=dict(normalization="group_norm", groups=32), activation=dict(activation="relu"), feat_dims=256, dropblock=None, num_classes=num_classes, repeats=4, min_level=3, max_level=7, use_sigmoid=True, prior=0.01, reg_max=16, bbox_decoder = dict(decoder="Distance2Box", weights=None), bbox_encoder = dict(encoder="Box2Distance", weights=None), assigner = dict(assigner="ATSSAssigner", topk=9), sampler = dict(sampler="PseudoSampler"), label_loss = dict(loss="QualityFocalLoss", beta=2.0, weight=1., from_logits=True, reduction="sum"), bbox_loss = dict(loss="GIoULoss", weight=2., reduction="sum"), dfl_loss = dict(loss="DistributionFocalLoss", weight=.25, reduction="sum")) h.weight_decay = 1e-4 h.excluding_weight_names = ["predicted_box", "predicted_class"] h.train=dict(dataset=dict(dataset="COCODataset", batch_size=4, dataset_dir="/data/bail/COCO", training=True, augmentations=[ dict(augmentation="FlipLeftToRight", probability=0.5), dict(augmentation="RandomDistortColor"), dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True), dict(augmentation="Pad", size_divisor=32), ], num_samples=118287), pretrained_weights_path="/data/bail/pretrained_weights/resnet50/resnet50.ckpt", optimizer=dict(optimizer="SGD", momentum=0.9), mixed_precision=dict(loss_scale=None), # The loss scale in mixed precision training. If None, use dynamic. gradient_clip_norm=10.0, scheduler=dict(train_epochs=24, learning_rate_scheduler=dict(scheduler="PiecewiseConstantDecay", boundaries=[16, 22], values=[0.02, 0.002, 0.0002]), warmup=dict(warmup_learning_rate=0.001, steps=800)), checkpoint_dir="checkpoints/gfl", summary_dir="logs/gfl", log_every_n_steps=100, save_ckpt_steps=5000) h.val=dict(dataset=dict(dataset="COCODataset", batch_size=4, dataset_dir="/data/bail/COCO", training=False, augmentations=[ dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True), dict(augmentation="Pad", size_divisor=32), ]), samples=5000) h.test=dict(nms="CombinedNonMaxSuppression", pre_nms_size=5000, post_nms_size=100, iou_threshold=0.6, score_threshold=0.3) return h
def get_faster_rcnn_config(num_classes=80): h = Config() h.detector = "FasterRCNN" h.dtype = "float16" h.num_classes = num_classes h.backbone = dict(backbone="ResNet50V1D", convolution="conv2d", dropblock=None, normalization=dict(normalization="batch_norm", momentum=0.997, epsilon=1e-4, trainable=False), activation=dict(activation="relu"), strides=[2, 2, 2, 2, 2], dilation_rates=[1, 1, 1, 1, 1], output_indices=[2, 3, 4, 5], frozen_stages=[ -1, ]) h.neck = dict(neck="FPN", convolution="conv2d", feat_dims=256, input_dims=[256, 512, 1024, 2048], min_level=2, max_level=6, add_extra_convs=False) h.anchors = dict(scales=[[32], [64], [128], [256], [512]], aspect_ratios=[0.5, 1., 2.0], strides=[4, 8, 16, 32, 64], num_anchors=3) h.rpn_head = dict(head="RPNHead", convolution="conv2d", normalization=None, activation=dict(activation="relu"), feat_dims=256, dropblock=None, num_classes=2, min_level=2, max_level=6, use_sigmoid=False, bbox_encoder=dict(encoder="Box2Delta", weights=None), bbox_decoder=dict(decoder="Delta2Box", weights=None), assigner=dict(assigner="MaxIoUAssigner", pos_iou_thresh=0.7, neg_iou_thresh=0.3), sampler=dict(sampler="RandomSampler", num_proposals=256, pos_fraction=0.5, add_gt_as_proposals=False), label_loss=dict(loss="CrossEntropy", label_smoothing=0.01, weight=1., from_logits=True, reduction="none"), bbox_loss=dict(loss="SmoothL1Loss", delta=1. / 9., weight=1., reduction="none")) h.roi_head = dict( num_stages=3, roi_pooling=dict(roi_pooling="MultiLevelAlignedRoIPooling", cropped_size=7, strides=(4, 8, 16, 32, 64)), bbox_head=[ dict(head="Shared2FCRCNNHead", activation=dict(activation="relu"), dropblock=None, fc_dims=1024, num_classes=num_classes, bbox_encoder=dict(encoder="Box2Delta", weights=[10., 10., 5., 5.]), bbox_decoder=dict(decoder="Delta2Box", weights=[10., 10., 5., 5.]), assigner=dict( assigner="MaxIoUAssigner", pos_iou_thresh=0.5, neg_iou_thresh=0.5, ), sampler=dict(sampler="RandomSampler", num_proposals=512, pos_fraction=0.25, add_gt_as_proposals=True), label_loss=dict(loss="CrossEntropy", label_smoothing=0.0, weight=1., from_logits=True, reduction="none"), bbox_loss=dict(loss="SmoothL1Loss", delta=1., weight=1., reduction="none"), use_sigmoid=False, reg_class_agnostic=True), dict(head="Shared2FCRCNNHead", activation=dict(activation="relu"), dropblock=None, fc_dims=1024, num_classes=num_classes, bbox_encoder=dict(encoder="Box2Delta", weights=[20., 20., 10., 10.]), bbox_decoder=dict(decoder="Delta2Box", weights=[20., 20., 10., 10.]), assigner=dict( assigner="MaxIoUAssigner", pos_iou_thresh=0.6, neg_iou_thresh=0.6, ), sampler=dict(sampler="RandomSampler", num_proposals=512, pos_fraction=0.25, add_gt_as_proposals=True), label_loss=dict(loss="CrossEntropy", label_smoothing=0.01, weight=1., from_logits=True, reduction="none"), bbox_loss=dict(loss="SmoothL1Loss", delta=1., weight=1., reduction="none"), use_sigmoid=False, reg_class_agnostic=True), dict(head="Shared2FCRCNNHead", activation=dict(activation="relu"), dropblock=None, fc_dims=1024, num_classes=num_classes, bbox_encoder=dict( encoder="Box2Delta", weights=[1. / 0.033, 1. / 0.022, 1. / 0.067, 1. / 0.067]), bbox_decoder=dict( decoder="Delta2Box", weights=[1. / 0.033, 1. / 0.022, 1. / 0.067, 1. / 0.067]), assigner=dict(assigner="MaxIoUAssigner", pos_iou_thresh=0.7, neg_iou_thresh=0.7), sampler=dict(sampler="RandomSampler", num_proposals=512, pos_fraction=0.25, add_gt_as_proposals=True), label_loss=dict(loss="CrossEntropy", label_smoothing=0.01, weight=1., from_logits=True, reduction="none"), bbox_loss=dict(loss="SmoothL1Loss", delta=1., weight=1., reduction="none"), use_sigmoid=False, reg_class_agnostic=True) ]) h.weight_decay = 1e-4 h.train = dict( proposal_layer=dict(pre_nms_size=12000, post_nms_size=2000, max_total_size=2000, iou_threshold=0.7, min_size=0), input_size=(1024, 1024), dataset=dict(dataset="COCODataset", batch_size=2, dataset_dir="/data/bail/COCO", training=True, augmentations=[ dict(FlipLeftToRight=dict(probability=0.5)), dict(RandomDistortColor=dict(probability=1.)), dict(Resize=dict(size=(1024, 1024), strides=128, min_scale=1.0, max_scale=1.0)), ], num_samples=118287, num_classes=num_classes), pretrained_weights_path="/data/bail/pretrained_weights/resnet50_v1d.h5", optimizer=dict(optimizer="SGD", momentum=0.9), mixed_precision=dict( loss_scale=None ), # The loss scale in mixed precision training. If None, use dynamic. gradient_clip_norm=10.0, scheduler=dict(train_epochs=12, learning_rate_scheduler=dict( scheduler="PiecewiseConstantDecay", boundaries=[8, 11], values=[0.02, 0.002, 0.0002]), warmup=dict(warmup_learning_rate=0.0001, steps=8000)), checkpoint_dir="checkpoints/faster_rcnn", summary_dir="logs/faster_rcnn", log_every_n_steps=100, save_ckpt_steps=5000) h.val = dict(dataset=dict( dataset="COCODataset", batch_size=2, dataset_dir="/data/bail/COCO", training=False, augmentations=[ dict(Resize=dict( size=(1024, 1024), strides=128, min_scale=1.0, max_scale=1.0)) ]), input_size=(1024, 1024), samples=5000, val_every_n_steps=250) h.test = dict( proposal_layer=dict(pre_nms_size=6000, post_nms_size=1000, max_total_size=1000, iou_threshold=0.7, min_size=0), pre_nms_size=1000, # select top_k high confident detections for nms post_nms_size=100, iou_threshold=0.5, score_threshold=0.05) return h
def get_yolof_config(num_classes=80): h = Config() data_format = "channels_last" input_size = (1024, 1024) h.detector = "YOLOF" h.dtype = "float16" h.data_format = data_format h.input_shape = (input_size[0], input_size[1], 3) h.num_classes = num_classes h.backbone = dict(backbone="ResNeXt101_64X4D", dropblock=None, normalization=dict(normalization="batch_norm", momentum=0.997, epsilon=1e-4, trainable=False), activation=dict(activation="relu"), strides=[2, 2, 2, 2, 2], dilation_rates=[1, 1, 1, 1, 1], output_indices=[5], frozen_stages=[1, ]) h.neck=dict(neck="DilatedEncoder", filters=512, midfilters=128, dilation_rates=[2, 4, 6, 8], # dilation not in stage5 # dilation_rates=[4, 8, 12, 16], normalization=dict(normalization="batch_norm", momentum=0.9, epsilon=1e-3, axis=-1, trainable=True), activation=dict(activation="relu"), kernel_initializer="he_normal", data_format=data_format) h.anchors=dict(generator="AnchorGenerator", aspect_ratios=[1.], scales=[32, 64, 128, 256, 512], strides=32, # scales=[16, 32, 64, 128, 256, 512], # strides=16, num_anchors=5) h.head=dict(head="YOLOFHead", normalization=dict(normalization="batch_norm", momentum=0.9, epsilon=1e-3, axis=-1, trainable=True), activation=dict(activation="relu"), feat_dims=512, cls_num_convs=2, reg_num_convs=4, kernel_initializer="he_normal", use_sigmoid=True, prior=0.01, bbox_decoder=dict(decoder="Delta2Box", weights=[1., 1., 1., 1.]), bbox_encoder=dict(encoder="Box2Delta", weights=[1., 1., 1., 1.]), assigner=dict(assigner="UniformAssigner", match_times=8, pos_ignore_thresh=0.7, neg_ignore_thresh=0.15), sampler=dict(sampler="PseudoSampler"), label_loss=dict(loss="FocalLoss", alpha=0.25, gamma=2.0, weight=1., from_logits=True, reduction="sum"), bbox_loss=dict(loss="GIoULoss", weight=2., reduction="sum")) h.weight_decay = 1e-4 h.excluding_weight_names = ["predicted_box", "predicted_class"] h.train=dict(dataset=dict(dataset="COCODataset", batch_size=4, dataset_dir="/data/bail/COCO", training=True, augmentations=[ dict(augmentation="FlipLeftToRight", probability=0.5), dict(augmentation="RandomDistortColor"), dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True), dict(augmentation="Pad", size_divisor=32) ], num_samples=118287), pretrained_weights_path="/data/bail/pretrained_weights/resnet50/resnet50.ckpt", optimizer=dict(optimizer="SGD", momentum=0.9), mixed_precision=dict(loss_scale=None), # The loss scale in mixed precision training. If None, use dynamic. gradient_clip_norm=10.0, scheduler=dict(train_epochs=24, learning_rate_scheduler=dict(scheduler="PiecewiseConstantDecay", boundaries=[16, 22], values=[0.02, 0.002, 0.0002]), warmup=dict(warmup_learning_rate=0.001, steps=800)), checkpoint_dir="checkpoints/yolof", summary_dir="logs/yolof", log_every_n_steps=100, save_ckpt_steps=5000) h.val=dict(dataset=dict(dataset="COCODataset", batch_size=4, dataset_dir="/data/bail/COCO", training=False, augmentations=[ dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True), dict(augmentation="Pad", size_divisor=32), ]), samples=5000) h.test=dict(nms="CombinedNonMaxSuppression", pre_nms_size=2000, post_nms_size=100, iou_threshold=0.5, score_threshold=0.35) return h
def default_detection_configs(phi, min_level=3, max_level=7, fpn_filters=64, neck_repeats=3, head_repeats=3, anchor_scale=4, num_scales=3, batch_size=4, image_size=512, fusion_type="weighted_sum"): h = Config() # model name h.detector = "efficientdet-d%d" % phi h.min_level = min_level h.max_level = max_level h.dtype = "float16" # backbone h.backbone = dict(backbone="efficientnet-b%d" % phi, convolution="depthwise_conv2d", dropblock=None, # dropblock=dict(keep_prob=None, # block_size=None) normalization=dict(normalization="batch_norm", momentum=0.99, epsilon=1e-3, axis=-1, trainable=False), activation=dict(activation="swish"), strides=[2, 1, 2, 2, 2, 1, 2, 1], dilation_rates=[1, 1, 1, 1, 1, 1, 1, 1], output_indices=[3, 4, 5], frozen_stages=[-1]) # neck h.neck = dict(neck="bifpn", repeats=neck_repeats, convolution="separable_conv2d", dropblock=None, # dropblock=dict(keep_prob=None, # block_size=None) feat_dims=fpn_filters, normalization=dict(normalization="batch_norm", momentum=0.99, epsilon=1e-3, axis=-1, trainable=False), activation=dict(activation="swish"), add_extra_conv=False, # Add extra convolution for neck fusion_type=fusion_type, use_multiplication=False) # head h.head = dict(head="RetinaNetHead", repeats=head_repeats, convolution="separable_conv2d", dropblock=None, # dropblock=dict(keep_prob=None, # block_size=None) feat_dims=fpn_filters, normalization=dict(normalization="batch_norm", momentum=0.99, epsilon=1e-3, axis=-1, trainable=False), activation=dict(activation="swish"), prior=0.01) # anchors parameters strides = [2 ** l for l in range(min_level, max_level + 1)] h.anchor = dict(aspect_ratios=[[1., 0.5, 2.]] * (max_level - min_level + 1), scales=[ [2 ** (i / num_scales) * s * anchor_scale for i in range(num_scales)] for s in strides ], num_anchors=9) # assigner h.assigner = dict(assigner="max_iou_assigner", pos_iou_thresh=0.5, neg_iou_thresh=0.5) # sampler h.sampler = dict(sampler="pseudo_sampler") # loss h.use_sigmoid = True h.label_loss=dict(loss="focal_loss", alpha=0.25, gamma=1.5, label_smoothing=0., weight=1., from_logits=True, reduction="none") h.bbox_loss=dict(loss="smooth_l1_loss", weight=50., # 50. delta=.1, # .1 reduction="none") # h.box_loss=dict(loss="giou_loss", # weight=10., # reduction="none") h.weight_decay = 4e-5 h.bbox_mean = None # [0., 0., 0., 0.] h.bbox_std = None # [0.1, 0.1, 0.2, 0.2] # dataset h.num_classes = 90 h.skip_crowd_during_training = True h.dataset = "objects365" h.batch_size = batch_size h.input_size = [image_size, image_size] h.train_dataset_dir = "/home/bail/Data/data1/Dataset/Objects365/train" h.val_dataset_dir = "/home/bail/Data/data1/Dataset/Objects365/train" h.augmentation = [ dict(ssd_crop=dict(patch_area_range=(0.3, 1.), aspect_ratio_range=(0.5, 2.0), min_overlaps=(0.1, 0.3, 0.5, 0.7, 0.9), max_attempts=100, probability=.5)), # dict(data_anchor_sampling=dict(anchor_scales=(16, 32, 64, 128, 256, 512), # overlap_threshold=0.7, # max_attempts=50, # probability=.5)), dict(flip_left_to_right=dict(probability=0.5)), dict(random_distort_color=dict(probability=1.)) ] # train h.pretrained_weights_path = "/home/bail/Workspace/pretrained_weights/efficientdet-d%d" % phi h.optimizer = dict(optimizer="sgd", momentum=0.9) h.lookahead = None h.train_steps = 240000 h.learning_rate_scheduler = dict(scheduler="cosine", initial_learning_rate=0.002) h.warmup = dict(warmup_learning_rate = 0.00001, steps = 24000) h.checkpoint_dir = "checkpoints/efficientdet_d%d" % phi h.summary_dir = "logs/efficientdet_d%d" % phi h.gradient_clip_norm = .0 h.log_every_n_steps = 500 h.save_ckpt_steps = 10000 h.val_every_n_steps = 4000 h.postprocess = dict(pre_nms_size=5000, # select top_k high confident detections for nms post_nms_size=100, iou_threshold=0.5, score_threshold=0.2) return h
def get_faster_rcnn_config(num_classes=80): h = Config() h.detector = "FasterRCNN" h.dtype = "float16" h.data_format = "channels_last" h.num_classes = num_classes h.backbone = dict(backbone="CaffeResNet50", dropblock=None, normalization=dict(normalization="batch_norm", momentum=0.997, epsilon=1e-4, trainable=True), activation=dict(activation="relu"), strides=[2, 2, 2, 2, 2], dilation_rates=[1, 1, 1, 1, 1], output_indices=[2, 3, 4, 5], frozen_stages=[ -1, ]) h.neck = dict(neck="FPN", feat_dims=256, min_level=2, max_level=5, num_output_levels=5, add_extra_convs=False) h.anchors = dict(generator="AnchorGenerator", scales=[[32], [64], [128], [256], [512]], aspect_ratios=[0.5, 1., 2.0], strides=[4, 8, 16, 32, 64], num_anchors=3) h.rpn_head = dict(head="RPNHead", normalization=None, activation=dict(activation="relu"), feat_dims=256, dropblock=None, num_classes=1, min_level=2, max_level=6, use_sigmoid=True, train_proposal=dict(pre_nms_size=2000, post_nms_size=1000, iou_threshold=0.7, min_box_size=0.), test_proposal=dict(pre_nms_size=1000, post_nms_size=1000, iou_threshold=0.7, min_box_size=0.1), bbox_encoder=dict(encoder="Box2Delta", weights=None), bbox_decoder=dict(decoder="Delta2Box", weights=None), assigner=dict(assigner="MaxIoUAssigner", pos_iou_thresh=0.7, neg_iou_thresh=0.3, min_pos_iou=0.3), sampler=dict(sampler="RandomSampler", num_proposals=256, pos_fraction=0.5, add_gt_as_proposals=False), label_loss=dict(loss="CrossEntropy", label_smoothing=0.0, weight=1., from_logits=True, reduction="none"), bbox_loss=dict(loss="SmoothL1Loss", delta=1. / 9., weight=1., reduction="none")) h.roi_head = dict(head="StandardRoIHead", bbox_head=dict(roi_pooling=dict( roi_pooling="MultiLevelAlignedRoIPooling", pooled_size=7, feat_dims=256), normalization=None, activation=dict(activation="relu"), dropblock=None, num_convs=0, conv_dims=256, num_fc=2, fc_dims=1024, feat_dims=256), min_level=2, max_level=5, class_agnostic=False, use_sigmoid=False, bbox_encoder=dict(encoder="Box2Delta", weights=[10., 10., 5., 5.]), bbox_decoder=dict(decoder="Delta2Box", weights=[10., 10., 5., 5.]), assigner=dict(assigner="MaxIoUAssigner", pos_iou_thresh=0.5, neg_iou_thresh=0.5, min_pos_iou=0.5), sampler=dict(sampler="RandomSampler", num_proposals=512, pos_fraction=0.25, add_gt_as_proposals=True), label_loss=dict(loss="CrossEntropy", label_smoothing=0.0, weight=1., from_logits=True, reduction="none"), bbox_loss=dict(loss="SmoothL1Loss", delta=1., weight=1., reduction="none"), reg_class_agnostic=True) h.weight_decay = 1e-4 h.train = dict( dataset=dict(dataset="COCODataset", batch_size=2, dataset_dir="/data/bail/COCO", training=True, augmentations=[ dict(augmentation="FlipLeftToRight", probability=0.5), dict(augmentation="RandomDistortColor"), dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True), dict(augmentation="Pad", size_divisor=32), ], num_samples=118287, num_classes=num_classes), pretrained_weights_path= "/data/bail/pretrained_weights/resnet50/resnet50.ckpt", optimizer=dict(optimizer="SGD", momentum=0.9), mixed_precision=dict( loss_scale=None ), # The loss scale in mixed precision training. If None, use dynamic. gradient_clip_norm=10.0, scheduler=dict(train_epochs=12, learning_rate_scheduler=dict( scheduler="PiecewiseConstantDecay", boundaries=[8, 11], values=[0.01, 0.001, 0.0001]), warmup=dict(warmup_learning_rate=0.00001, steps=2000)), checkpoint_dir="checkpoints/faster_rcnn", summary_dir="logs/faster_rcnn", log_every_n_steps=100, save_ckpt_steps=5000) h.val = dict(dataset=dict(dataset="COCODataset", batch_size=1, dataset_dir="/data/bail/COCO", training=False, augmentations=[ dict(augmentation="Resize", img_scale=[(1333, 800)], keep_ratio=True), dict(augmentation="Pad", size_divisor=32), ]), input_size=(1024, 1024), samples=5000) h.test = dict(nms="CombinedNonMaxSuppression", pre_nms_size=5000, post_nms_size=100, iou_threshold=0.6, score_threshold=0.5) return h