コード例 #1
0
ファイル: yolov4_config.py プロジェクト: wavce/letsdet
def get_yolov4_config(input_size=None,
                      num_classes=80, 
                      num_anchors=3,
                      depth_multiplier=1.,
                      width_multiplier=1.,
                      label_assignment="iou", 
                      name="YOLOv4"):
    h = Config()
    
    if input_size is None:
        input_size = default_input_sizes[name]
    h.detector = name
    h.dtype = "float16"
    h.data_format = "channels_last"
    h.num_classes = num_classes

    h.depth_multiplier = depth_multiplier_dict[name]
    h.width_multiplier = width_multiplier_dict[name]
    
    if name not in model:
        raise ValueError(name + " not in ", list(model.keys()))
    h.model = model[name]
    h.min_level = min_level_dict[name]
    h.max_level = max_level_dict[name]
    h.strides = strides_dict[name]
    h.anchors = anchors_dict[name]
    h.num_anchors = len(anchors_dict[name][0]) // 2
    h.input_size = input_size if isinstance(input_size, (tuple, list)) else (input_size, input_size)

    h.label_assignment = label_assignment
    h.anchor_threshold = 0.2
    h.gr = 1.
        
    h.bbox_loss = dict(loss="CIoULoss", weight=1., reduction="none")  
    h.label_loss = dict(loss="BinaryCrossEntropy", weight=1., from_logits=True, reduction="none")  # .631 if finetuning else weight = 1.0
    h.conf_loss = dict(loss="BinaryCrossEntropy", weight=1., from_logits=True, reduction="none")   # 0.911 if finetuning else weight = 1.
    h.balance = [1., 1., 1.] # [4.0, 1.0, 0.4]   # if num_level == 3 else [4.0, 1.0, 0.4, 0.1]
    h.box_weight = 0.05  # 0.0296 if finetune else 0.05
    h.label_weight = .5  # 0.243 if finetune else 0.5
    h.conf_weight = 1.0   # 0.301 if finetune else 1.0
    
    h.weight_decay = 0.0005
    h.excluding_weight_names = ["predicted"]
    h.train=dict(dataset=dict(dataset="COCODataset",
                              batch_size=8,
                              dataset_dir="/data/bail/COCO",
                              training=True,
                              augmentations=[
                                  dict(augmentation="FlipLeftToRight", probability=0.5),
                                  dict(augmentation="RandomDistortColor"),
                                  dict(augmentation="Resize", img_scale=(0.2, 2), keep_ratio=True),
                                  dict(augmentation="Pad", size_divisor=32)
                              ],
                            #   mixup=dict(alpha=8.0, prob=0.5),
                              mosaic=dict(size=input_size, min_image_scale=0.25, prob=1.),
                              num_samples=118287),
                  pretrained_weights_path="/data/bail/pretrained_weights/darknet53-notop/darknet53.ckpt",
                  optimizer=dict(optimizer="SGD", momentum=0.937),
                  mixed_precision=dict(loss_scale=None),  # The loss scale in mixed precision training. If None, use dynamic.
                  gradient_clip_norm=.0,

                  scheduler=dict(train_epochs=480,
                                 #  learning_rate_scheduler=dict(scheduler="PiecewiseConstantDecay",
                                 #                               boundaries=[24, 32],
                                 #                               values=[0.012, 0.0012, 0.00012]),
                                 learning_rate_scheduler=dict(scheduler="CosineDecay", initial_learning_rate=0.012),
                                 warmup=dict(warmup_learning_rate=0.0012, steps=12000)),
                  checkpoint_dir="checkpoints/%s" % name,
                  summary_dir="logs/%s" % name,
                  log_every_n_steps=100,
                  save_ckpt_steps=10000)
    h.val=dict(dataset=dict(dataset="COCODataset", 
                            batch_size=8,  
                            dataset_dir="/data/bail/COCO", 
                            training=False, 
                            augmentations=[
                                dict(Resize=dict(size=(input_size, input_size), strides=32, min_scale=1., max_scale=1.0))
                                # dict(ResizeV2=dict(short_side=800, long_side=1333, strides=64, min_scale=1.0, max_scale=1.0))
                            ]),
               samples=5000)
    # h.test=dict(nms="NonMaxSuppressionWithQuality",
    #             pre_nms_size=5000,
    #             post_nms_size=100, 
    #             iou_threshold=0.6, 
    #             score_threshold=0.5,
    #             sigma=0.5,
    #             nms_type="nms")
    h.test=dict(nms="CombinedNonMaxSuppression",
                pre_nms_size=2000,
                post_nms_size=100, 
                iou_threshold=0.6, 
                score_threshold=0.35)

    return h
コード例 #2
0
def default_detection_configs(phi, 
                              min_level=3, 
                              max_level=7, 
                              fpn_filters=64,
                              neck_repeats=3,
                              head_repeats=3,
                              anchor_scale=4,
                              num_scales=3,
                              batch_size=4,
                              image_size=512,
                              fusion_type="weighted_sum"):
    h = Config()

    # model name
    h.detector = "efficientdet-d%d" % phi
    h.min_level = min_level
    h.max_level = max_level
    h.dtype = "float16"

    # backbone
    h.backbone = dict(backbone="efficientnet-b%d" % phi,
                      convolution="depthwise_conv2d",
                      dropblock=None,
                    #   dropblock=dict(keep_prob=None,
                    #                  block_size=None)
                      normalization=dict(normalization="batch_norm",
                                         momentum=0.99,
                                         epsilon=1e-3,
                                         axis=-1,
                                         trainable=False),
                      activation=dict(activation="swish"),
                      strides=[2, 1, 2, 2, 2, 1, 2, 1],
                      dilation_rates=[1, 1, 1, 1, 1, 1, 1, 1],
                      output_indices=[3, 4, 5],
                      frozen_stages=[-1])
    
    # neck
    h.neck = dict(neck="bifpn",
                  repeats=neck_repeats,
                  convolution="separable_conv2d",
                  dropblock=None,
                #   dropblock=dict(keep_prob=None,
                #                  block_size=None)
                  feat_dims=fpn_filters,
                  normalization=dict(normalization="batch_norm",
                                     momentum=0.99,
                                     epsilon=1e-3,
                                     axis=-1,
                                     trainable=False),
                  activation=dict(activation="swish"),
                  add_extra_conv=False,  # Add extra convolution for neck
                  fusion_type=fusion_type, 
                  use_multiplication=False)
    
    # head
    h.head = dict(head="RetinaNetHead",
                  repeats=head_repeats,
                  convolution="separable_conv2d",
                  dropblock=None,
                #   dropblock=dict(keep_prob=None,
                #                  block_size=None)
                  feat_dims=fpn_filters,
                  normalization=dict(normalization="batch_norm",
                                     momentum=0.99,
                                     epsilon=1e-3,
                                     axis=-1,
                                     trainable=False),
                  activation=dict(activation="swish"),
                  prior=0.01)
    
    # anchors parameters
    strides = [2 ** l for l in range(min_level, max_level + 1)]
    h.anchor = dict(aspect_ratios=[[1., 0.5, 2.]] * (max_level - min_level + 1),
                    scales=[
                        [2 ** (i / num_scales) * s * anchor_scale 
                        for i in range(num_scales)] for s in strides
                    ],
                    num_anchors=9)

    # assigner
    h.assigner = dict(assigner="max_iou_assigner",
                      pos_iou_thresh=0.5,
                      neg_iou_thresh=0.5)
    # sampler
    h.sampler = dict(sampler="pseudo_sampler")
    
    # loss
    h.use_sigmoid = True
    h.label_loss=dict(loss="focal_loss",
                      alpha=0.25,
                      gamma=1.5,
                      label_smoothing=0.,
                      weight=1.,
                      from_logits=True,
                      reduction="none")
    h.bbox_loss=dict(loss="smooth_l1_loss",
                     weight=50.,   # 50.
                     delta=.1,    # .1
                     reduction="none")
    # h.box_loss=dict(loss="giou_loss",
    #                 weight=10.,
    #                 reduction="none")
    h.weight_decay = 4e-5

    h.bbox_mean = None  # [0., 0., 0., 0.]
    h.bbox_std = None  # [0.1, 0.1, 0.2, 0.2]

    # dataset
    h.num_classes = 90
    h.skip_crowd_during_training = True
    h.dataset = "objects365"

    h.batch_size = batch_size
    h.input_size = [image_size, image_size]
    h.train_dataset_dir = "/home/bail/Data/data1/Dataset/Objects365/train"
    h.val_dataset_dir = "/home/bail/Data/data1/Dataset/Objects365/train"
    h.augmentation = [
        dict(ssd_crop=dict(patch_area_range=(0.3, 1.),
                            aspect_ratio_range=(0.5, 2.0),
                            min_overlaps=(0.1, 0.3, 0.5, 0.7, 0.9),
                            max_attempts=100,
                            probability=.5)),
        # dict(data_anchor_sampling=dict(anchor_scales=(16, 32, 64, 128, 256, 512),
        #                                overlap_threshold=0.7,
        #                                max_attempts=50,
        #                                probability=.5)),
        dict(flip_left_to_right=dict(probability=0.5)),
        dict(random_distort_color=dict(probability=1.))
        ]

    # train
    h.pretrained_weights_path = "/home/bail/Workspace/pretrained_weights/efficientdet-d%d" % phi

    h.optimizer = dict(optimizer="sgd", momentum=0.9)
    h.lookahead = None

    h.train_steps = 240000
    h.learning_rate_scheduler = dict(scheduler="cosine", initial_learning_rate=0.002)
    h.warmup = dict(warmup_learning_rate = 0.00001, steps = 24000)
    h.checkpoint_dir = "checkpoints/efficientdet_d%d" % phi
    h.summary_dir = "logs/efficientdet_d%d" % phi

    h.gradient_clip_norm = .0

    h.log_every_n_steps = 500
    h.save_ckpt_steps = 10000
    h.val_every_n_steps = 4000

    h.postprocess = dict(pre_nms_size=5000,   # select top_k high confident detections for nms 
                         post_nms_size=100,
                         iou_threshold=0.5,
                         score_threshold=0.2)
    
    return h