def _darknet(num_layers=53, input_channels=3, pretrained=True): model = DarkNet(num_layers, input_channels) if pretrained: assert num_layers in pretrain_infos.keys(), \ "DarkNet{} do not have pretrained weights now, " \ "pretrained should be set as False".format(num_layers) weight_path = get_weights_path(*(pretrain_infos[num_layers])) assert weight_path.endswith('.pdparams'), \ "suffix of weight must be .pdparams" model.load(weight_path[:-9]) return model
def _tsm_resnet(num_layers, seg_num=8, num_classes=400, pretrained=True): model = TSM_ResNet(num_layers, seg_num, num_classes) if pretrained: assert num_layers in pretrain_infos.keys(), \ "TSM-ResNet{} do not have pretrained weights now, " \ "pretrained should be set as False".format(num_layers) weight_path = get_weights_path(*(pretrain_infos[num_layers])) assert weight_path.endswith('.pdparams'), \ "suffix of weight must be .pdparams" model.load(weight_path) return model
def _resnet(arch, Block, depth, pretrained, **kwargs): model = ResNet(Block, depth, **kwargs) if pretrained: assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( arch) weight_path = get_weights_path(model_urls[arch][0], model_urls[arch][1]) assert weight_path.endswith( '.pdparams'), "suffix of weight must be .pdparams" model.load(weight_path[:-9]) return model
def _yolov3_darknet(num_layers=53, num_classes=80, model_mode='train', pretrained=True): model = YOLOv3(num_classes, model_mode) if pretrained: assert num_layers in pretrain_infos.keys(), \ "YOLOv3-DarkNet{} do not have pretrained weights now, " \ "pretrained should be set as False".format(num_layers) weight_path = get_weights_path(*(pretrain_infos[num_layers])) assert weight_path.endswith('.pdparams'), \ "suffix of weight must be .pdparams" model.load(weight_path) return model
def bmn(cfg, pretrained=True): """BMN model Args: cfg (AttrDict): configs for BMN model pretrained (bool): If True, returns a model with pre-trained model on COCO, default True """ model = BMN(cfg) if pretrained: weight_path = get_weights_path(*(pretrain_infos['bmn'])) assert weight_path.endswith('.pdparams'), \ "suffix of weight must be .pdparams" model.load(weight_path[:-9]) return model
def _vgg(arch, cfg, batch_norm, pretrained, **kwargs): model = VGG(make_layers( cfgs[cfg], batch_norm=batch_norm), num_classes=1000, **kwargs) if pretrained: assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( arch) weight_path = get_weights_path(model_urls[arch][0], model_urls[arch][1]) assert weight_path.endswith( '.pdparams'), "suffix of weight must be .pdparams" model.load(weight_path[:-9]) return model
def bmn(tscale, dscale, prop_boundary_ratio, num_sample, num_sample_perbin, pretrained=True): """BMN model Args: tscale (int): sequence length, default 100. dscale (int): max duration length, default 100. prop_boundary_ratio (float): ratio of expanded temporal region in proposal boundary, default 0.5. num_sample (int): number of samples betweent starting boundary and ending boundary of each propoasl, default 32. num_sample_perbin (int): number of selected points in each sample, default 3. pretrained (bool): If True, returns a model with pre-trained model, default True. """ model = BMN(tscale, dscale, prop_boundary_ratio, num_sample, num_sample_perbin) if pretrained: weight_path = get_weights_path(*(pretrain_infos['bmn'])) assert weight_path.endswith('.pdparams'), \ "suffix of weight must be .pdparams" model.load(weight_path) return model
def main(): device = set_device(FLAGS.device) fluid.enable_dygraph(device) if FLAGS.dynamic else None inputs = [ Input([None, 1], 'int64', name='img_id'), Input([None, 2], 'int32', name='img_shape'), Input([None, 3, None, None], 'float32', name='image') ] labels = [ Input([None, NUM_MAX_BOXES, 4], 'float32', name='gt_bbox'), Input([None, NUM_MAX_BOXES], 'int32', name='gt_label'), Input([None, NUM_MAX_BOXES], 'float32', name='gt_score') ] if not FLAGS.eval_only: # training mode train_transform = Compose([ ColorDistort(), RandomExpand(), RandomCrop(), RandomFlip(), NormalizeBox(), PadBox(), BboxXYXY2XYWH() ]) train_collate_fn = BatchCompose([RandomShape(), NormalizeImage()]) dataset = COCODataset(dataset_dir=FLAGS.data, anno_path='annotations/instances_train2017.json', image_dir='train2017', with_background=False, mixup=True, transform=train_transform) batch_sampler = DistributedBatchSampler(dataset, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True) loader = DataLoader(dataset, batch_sampler=batch_sampler, places=device, num_workers=FLAGS.num_workers, return_list=True, collate_fn=train_collate_fn) else: # evaluation mode eval_transform = Compose([ ResizeImage(target_size=608), NormalizeBox(), PadBox(), BboxXYXY2XYWH() ]) eval_collate_fn = BatchCompose([NormalizeImage()]) dataset = COCODataset(dataset_dir=FLAGS.data, anno_path='annotations/instances_val2017.json', image_dir='val2017', with_background=False, transform=eval_transform) # batch_size can only be 1 in evaluation for YOLOv3 # prediction bbox is a LoDTensor batch_sampler = DistributedBatchSampler(dataset, batch_size=1, shuffle=False, drop_last=False) loader = DataLoader(dataset, batch_sampler=batch_sampler, places=device, num_workers=FLAGS.num_workers, return_list=True, collate_fn=eval_collate_fn) pretrained = FLAGS.eval_only and FLAGS.weights is None model = yolov3_darknet53(num_classes=dataset.num_classes, model_mode='eval' if FLAGS.eval_only else 'train', pretrained=pretrained) if FLAGS.pretrain_weights and not FLAGS.eval_only: pretrain_weights = FLAGS.pretrain_weights if is_url(pretrain_weights): pretrain_weights = get_weights_path(pretrain_weights) model.load(pretrain_weights, skip_mismatch=True, reset_optimizer=True) optim = make_optimizer(len(batch_sampler), parameter_list=model.parameters()) model.prepare(optim, YoloLoss(num_classes=dataset.num_classes), inputs=inputs, labels=labels, device=FLAGS.device) # NOTE: we implement COCO metric of YOLOv3 model here, separately # from 'prepare' and 'fit' framework for follwing reason: # 1. YOLOv3 network structure is different between 'train' and # 'eval' mode, in 'eval' mode, output prediction bbox is not the # feature map used for YoloLoss calculating # 2. COCO metric behavior is also different from defined Metric # for COCO metric should not perform accumulate in each iteration # but only accumulate at the end of an epoch if FLAGS.eval_only: if FLAGS.weights is not None: model.load(FLAGS.weights, reset_optimizer=True) preds = model.predict(loader, stack_outputs=False) _, _, _, img_ids, bboxes = preds anno_path = os.path.join(FLAGS.data, 'annotations/instances_val2017.json') coco_metric = COCOMetric(anno_path=anno_path, with_background=False) for img_id, bbox in zip(img_ids, bboxes): coco_metric.update(img_id, bbox) coco_metric.accumulate() coco_metric.reset() return if FLAGS.resume is not None: model.load(FLAGS.resume) model.fit(train_data=loader, epochs=FLAGS.epoch - FLAGS.no_mixup_epoch, save_dir="yolo_checkpoint/mixup", save_freq=10) # do not use image mixup transfrom in laste FLAGS.no_mixup_epoch epoches dataset.mixup = False model.fit(train_data=loader, epochs=FLAGS.no_mixup_epoch, save_dir="yolo_checkpoint/no_mixup", save_freq=5)