def detect_face(net, img, device, scale=1., conf_thresh=0.3): # set input x if scale != 1: img = cv2.resize(img, None, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR) x = torch.from_numpy(img).permute(2, 0, 1).unsqueeze(0) _, _, height, width = x.shape if device.type == 'cuda': x = x.to(device) # forward pass loc, conf, iou = net(x) # get bounding boxes from PriorBox layer bbox_scale = torch.Tensor([width, height, width, height]) priorbox = PriorBox(cfg, image_size=(height, width)) priors = priorbox.forward() boxes = decode(loc.squeeze(0).data.cpu(), priors.data, cfg['variance']) boxes = boxes[:, :4] # omit landmarks boxes = boxes * bbox_scale / scale boxes = boxes.cpu().numpy() # get scores cls_scores = conf.squeeze(0).data.cpu().numpy()[:, 1] iou_scores = iou.squeeze(0).data.cpu().numpy()[:, 0] # clamp here for the compatibility for ONNX _idx = np.where(iou_scores < 0.) iou_scores[_idx] = 0. _idx = np.where(iou_scores > 1.) iou_scores[_idx] = 1. scores = np.sqrt(cls_scores * iou_scores) dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) # ignore low scores keep_ind = np.where(dets[:, -1] > conf_thresh)[0] dets = dets[keep_ind, :] return dets
def forward(self, x): x = self.basenet.extract_features(x) feature_1 = x feature_2 = self.feature_2(x) feature_3 = self.feature_3(feature_2) feature_4 = self.feature_4(feature_3) feature_5 = F.max_pool2d(feature_4, kernel_size=2) ''' (2,4*4,16,16) (2,4*6,8,8) (2,4*6,4,4), (2,4*4,2,2), (2,4*4,1,1) -> 每个 anchor 中心,连续4个值代表x y w h ''' confidences = [] locations = [] locations.append( self.predict_bbox_1(feature_1).permute(0, 2, 3, 1).contiguous()) locations.append( self.predict_bbox_2(feature_2).permute(0, 2, 3, 1).contiguous()) locations.append( self.predict_bbox_3(feature_3).permute(0, 2, 3, 1).contiguous()) locations.append( self.predict_bbox_4(feature_4).permute(0, 2, 3, 1).contiguous()) locations.append( self.predict_bbox_5(feature_5).permute(0, 2, 3, 1).contiguous()) locations = torch.cat([o.view(o.size(0), -1) for o in locations], 1) #(batch_size,total_anchor_num*4) locations = locations.view(locations.size(0), -1, 4) # (batch_size,total_anchor_num,4) confidences.append( self.predict_class_1(feature_1).permute(0, 2, 3, 1).contiguous()) confidences.append( self.predict_class_2(feature_2).permute(0, 2, 3, 1).contiguous()) confidences.append( self.predict_class_3(feature_3).permute(0, 2, 3, 1).contiguous()) confidences.append( self.predict_class_4(feature_4).permute(0, 2, 3, 1).contiguous()) confidences.append( self.predict_class_5(feature_5).permute(0, 2, 3, 1).contiguous()) confidences = torch.cat([o.view(o.size(0), -1) for o in confidences], 1) #(batch_size,total_anchor_num*4) confidences = confidences.view(confidences.size(0), -1, 3) # (batch_size,total_anchor_num,4) if not self.training: if self.priors is None: self.priors = PriorBox()() self.priors = self.priors.cuda() boxes = convert_locations_to_boxes(locations, self.priors, 0.1, 0.2) confidences = F.softmax(confidences, dim=2) return confidences, boxes else: #print(confidences.size(),locations.size()) return (confidences, locations) # (2,1111,3) (2,1111,4)
def __init__(self, pretrained=None): super(ScratchDet, self).__init__() self.pretraind_weight = pretrained self.root_res = SSDRES512(input_size=(512, 512), depth=101) self.loc_layers, self.conf_layers = pred_brach() self.priors = torch.Tensor(PriorBox().forward()) self.init_weight(pretrained=self.pretraind_weight)
def main(args): torch.set_grad_enabled(False) device = torch.device(args.device) # Initialize the net and load the model print('Loading pretrained model from {}'.format(args.trained_model)) net = YuFaceDetectNet(phase='test', size=None) net = load_model(net, args.trained_model) net.eval() if device.type == 'cuda': cudnn.benchmark = True net = net.to(device) print('Finished loading model!') # init data loader for WIDER Face print('Loading data for {}...'.format(args.widerface_split)) widerface = WIDERFace(args.widerface_root, split=args.widerface_split) print('Finished loading data!') # start testing scales = [] if args.multi_scale: scales = [0.25, 0.50, 0.75, 1.25, 1.50, 1.75, 2.0] print('Performing testing with scales: 1. {}, conf_threshold: {}'.format( str(scales), args.confidence_threshold)) priors_dict = {} for idx in tqdm(range(len(widerface))): img, event, name = widerface[idx] # img_subpath = '0--Parade/XXX.jpg' if img.shape in priors_dict: priors = priors_dict[img.shape] else: height, width, _ = img.shape priors = PriorBox(cfg, image_size=(height, width)).forward() priors_dict[img.shape] = priors dets = detect_face(net, img, priors, device) available_scales = get_available_scales(img.shape[0], img.shape[1], scales) for available_scale in available_scales: det = detect_face(net, img, None, device, scale=available_scale) if det.shape[0] != 0: dets = np.row_stack((dets, det)) # nms dets = nms_opencv(dets, score_thresh=args.confidence_threshold, nms_thresh=args.nms_threshold, top_k=args.top_k, keep_top_k=args.keep_top_k) save_res(dets, event, name) # widerface_eval print('Evaluating:') evaluation(args.res_dir, os.path.join(args.widerface_root, 'eval_tools/ground_truth'))
def __init__(self, base, extras, ARM, ODM, TCB, num_classes): super(RefineDet, self).__init__() self.num_classes = num_classes self.priorbox = PriorBox(cfgs.PriorBox_Cfg[str(cfgs.ImgSize)]) with torch.no_grad(): self.priors = self.priorbox.forward() # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.conv4_3_L2Norm = L2Norm(512, 10) self.conv5_3_L2Norm = L2Norm(512, 8) self.extras = nn.ModuleList(extras) self.arm_loc = nn.ModuleList(ARM[0]) self.arm_conf = nn.ModuleList(ARM[1]) self.odm_loc = nn.ModuleList(ODM[0]) self.odm_conf = nn.ModuleList(ODM[1]) #self.tcb = nn.ModuleList(TCB) self.tcb0 = nn.ModuleList(TCB[0]) self.tcb1 = nn.ModuleList(TCB[1]) self.tcb2 = nn.ModuleList(TCB[2])
def __init__(self, fpn_filter_list, scale_list, num_classes): super(RefineDet, self).__init__() self.num_classes = num_classes self.priorbox = PriorBox(cfgs.PriorBox_Cfg_resnet[str(cfgs.ImgSize)]) with torch.no_grad(): self.priors = self.priorbox.forward() # SSD network inplanes = 2048 planes = 512 self.backone = resnet101(pretrained=True) self.res6 = resnet_layer5(inplanes, planes, 3, 2) self.FPN = FPN(fpn_filter_list) Arm_P3 = RPN_Pred(fpn_filter_list[0], scale_list[0], 2) Arm_P4 = RPN_Pred(fpn_filter_list[1], scale_list[1], 2) Arm_P5 = RPN_Pred(fpn_filter_list[2], scale_list[2], 2) Arm_P6 = RPN_Pred(fpn_filter_list[3], scale_list[3], 2) Odm_P3 = RPN_Pred(256, scale_list[0], num_classes) Odm_P4 = RPN_Pred(256, scale_list[1], num_classes) Odm_P5 = RPN_Pred(256, scale_list[2], num_classes) Odm_P6 = RPN_Pred(256, scale_list[3], num_classes) self.Arm_list = nn.ModuleList([Arm_P3, Arm_P4, Arm_P5, Arm_P6]) self.Odm_list = nn.ModuleList([Odm_P3, Odm_P4, Odm_P5, Odm_P6])
def get_prediction(img, scale, im_height, im_width, print_messages=False): loc, conf = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > 0.3)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:5000] boxes = boxes[order] scores = scores[order] if print_messages: print('there are', len(boxes), 'candidates') return boxes, scores
def __init__(self, mode, backbone, size, num_classes, with_fpn=True): super(SSD, self).__init__() assert mode in ["test", "train"] assert backbone in ['mobilenetv3_large', 'mobilenetv3_small'] self.mode = mode self.num_classes = num_classes self.cfg = (coco_config, voc_config)[num_classes == 21] self.priorbox = PriorBox(self.cfg) self.priors = self.priorbox.forward() self.size = size self.with_fpn = with_fpn # SSD network if self.with_fpn: self.basenet, self.topnet, self.conv_layers, self.fpn_layers, self.loc_layers, self.conf_layers =\ self.build_ssd_with_fpn(backbone, self.size, self.num_classes) else: self.basenet, self.topnet, self.loc_layers, self.conf_layers =\ self.build_ssd(backbone, self.size, self.num_classes) if mode == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
def forward(self, x, targets=None): sources = [] confidences = [] locations = [] for i in range(23): x = self.vgg[i](x) s = self.l2_norm(x) # Conv4_3 L2 normalization sources.append(s) # apply vgg up to fc7 for i in range(23, len(self.vgg)): x = self.vgg[i](x) sources.append(x) for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) if k % 2 == 1: sources.append(x) for (x, l, c) in zip(sources, self.regression_headers, self.classification_headers): locations.append(l(x).permute(0, 2, 3, 1).contiguous()) confidences.append(c(x).permute(0, 2, 3, 1).contiguous()) confidences = torch.cat([o.view(o.size(0), -1) for o in confidences], 1) locations = torch.cat([o.view(o.size(0), -1) for o in locations], 1) confidences = confidences.view(confidences.size(0), -1, self.num_classes) locations = locations.view(locations.size(0), -1, 4) if not self.training: # when evaluating, decode predictions if self.priors is None: self.priors = PriorBox()().to(locations.device) confidences = F.softmax(confidences, dim=2) boxes = box_utils.convert_locations_to_boxes( locations, self.priors, 0.1, 0.2) boxes = box_utils.center_form_to_corner_form(boxes) print("testing !") return confidences, boxes else: return (confidences, locations)
def __init__(self, args): if args.ctx and torch.cuda.is_available(): self.use_cuda = True else: self.use_cuda = False if self.use_cuda: torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') self.loadmodel(args.modelpath) self.threshold = args.threshold self.img_dir = args.img_dir self.detect = Detect(cfg) # self.detect = DetectIou(cfg) # self.detect = Detect_demo(cfg) self.Prior = PriorBox() with torch.no_grad(): self.priors = self.Prior() self.num_classes = cfg.NUM_CLASSES
def __init__(self, img_path="./dataset", transform=None, center_variance=0.1, size_variance=0.2): self.center_variance = center_variance self.size_variance = size_variance self.img_paths = glob.glob(img_path + "/images/*.jpg") self.labels = [ label.replace(".jpg", ".xml").replace("images", "labels") for label in self.img_paths ] self.class_names = ("__background__", "basketball", "volleyball") prior = PriorBox() self.center_form_priors = prior() # center form self.imgW, self.imgH = 512, 512 self.corner_form_priors = center_form_to_corner_form( self.center_form_priors) #print(self.center_form_priors.size(),self.corner_form_priors.size()) self.transform = transform
def __init__(self, config): super(SSAD, self).__init__() self.num_classes = config.num_classes self.num_anchors = config.num_anchors self.input_feature_dim = config.feature_dim self.prediction_output = self.num_anchors * (self.num_classes + 3) self.best_loss = 10000000 self.prior_box = PriorBox(config) # Base Layers self.base_layers = nn.Sequential(OrderedDict([ ('conv1d_1', nn.Conv1d(in_channels=self.input_feature_dim, out_channels=512, kernel_size=9, stride=1, padding=4)), ('relu_1', nn.ReLU()), ('maxpooling1d_1', nn.MaxPool1d(kernel_size=4, stride=2, padding=1)), ('conv1d_2', nn.Conv1d(in_channels=512, out_channels=512, kernel_size=9, stride=1, padding=4)), ('relu_2', nn.ReLU()), ('maxpooling1d_2', nn.MaxPool1d(kernel_size=4, stride=2, padding=1)) ])) # Anchor Layers self.anchor_layer1 = nn.Sequential( nn.Conv1d(in_channels=512, out_channels=1024, kernel_size=3, stride=2, padding=1), nn.ReLU()) self.anchor_layer2 = nn.Sequential( nn.Conv1d(in_channels=1024, out_channels=1024, kernel_size=3, stride=2, padding=1), nn.ReLU()) self.anchor_layer3 = nn.Sequential( nn.Conv1d(in_channels=1024, out_channels=1024, kernel_size=3, stride=2, padding=1), nn.ReLU()) # Prediction Layers self.prediction_layer1 = nn.Conv1d(in_channels=1024, out_channels=self.prediction_output, kernel_size=3, stride=1, padding=1) self.prediction_layer2 = nn.Conv1d(in_channels=1024, out_channels=self.prediction_output, kernel_size=3, stride=1, padding=1) self.prediction_layer3 = nn.Conv1d(in_channels=1024, out_channels=self.prediction_output, kernel_size=3, stride=1, padding=1) self.reset_params()
def create_model(): ''' ''' ASPECT_RATIOS = [[2, 3], [2, 3], [2, 3], [2, 3], [2], [2]] IMAGE_SIZE = [300, 300] FEATURE_LAYER = [[22, 34, 'S', 'S', '', ''], [512, 1024, 512, 256, 256, 256]] NUM_CLASSES = 21 SIZES = [0.2, 0.95] STEPS = [] CLIP = True # base = networks_map['resnet_50'] number_box = [ 2 * len(aspect_ratios) if isinstance(aspect_ratios[0], int) else len(aspect_ratios) for aspect_ratios in ASPECT_RATIOS ] model = ssds_map['rfb'](base=base, feature_layer=FEATURE_LAYER, mbox=number_box, num_classes=NUM_CLASSES) # print(model) feature_maps = _forward_features_size(model, IMAGE_SIZE) print('==>Feature map size:') print(feature_maps) # priorbox = PriorBox(image_size=IMAGE_SIZE, feature_maps=feature_maps, aspect_ratios=ASPECT_RATIOS, scale=SIZES, archor_stride=STEPS, clip=CLIP) # priors = Variable(priorbox.forward(), volatile=True) return model, priorbox
# net = MobileNetV1(num_classes=num_classes) net = RetinaFace(cfg=cfg).to(device) params = [p for p in net.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=LR, momentum=0.9, weight_decay=0.0005) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # criterion = nn.CrossEntropyLoss().to(device) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) priorbox = PriorBox(cfg, image_size=(IMG_DIM, IMG_DIM)) with torch.no_grad(): priors = priorbox.forward() priors = priors.to(device) for epoch in range(MAX_EPOCH): lr = lr_scheduler.get_lr()[-1] t1 = time.time() time_dict = dict() for i, data in enumerate(train_loader): t2 = time.time() time_dict['iterdat'] = t2 - t1 print(time_dict['iterdat']) continue images, targets = data # B C H W
class RefineDet(nn.Module): """Single Shot Multibox Architecture The network is composed of a base VGG network followed by the added multibox conv layers. Each multibox layer branches into 1) conv2d for class conf scores 2) conv2d for localization predictions 3) associated priorbox layer to produce default bounding boxes specific to the layer's feature map size. See: RefineDet for more details. Args: size: input image size base: VGG16 layers for input, size of either 320 or 512 extras: extra layers that feed to multibox loc and conf layers ARM: "default box head" consists of loc and conf conv layers ODM: "multibox head" consists of loc and conf conv layers TCB: converting the features from the ARM to the ODM for detection numclass:ODM output classes """ def __init__(self, fpn_filter_list, scale_list, num_classes): super(RefineDet, self).__init__() self.num_classes = num_classes self.priorbox = PriorBox(cfgs.PriorBox_Cfg_resnet[str(cfgs.ImgSize)]) with torch.no_grad(): self.priors = self.priorbox.forward() # SSD network inplanes = 2048 planes = 512 self.backone = resnet101(pretrained=True) self.res6 = resnet_layer5(inplanes, planes, 3, 2) self.FPN = FPN(fpn_filter_list) Arm_P3 = RPN_Pred(fpn_filter_list[0], scale_list[0], 2) Arm_P4 = RPN_Pred(fpn_filter_list[1], scale_list[1], 2) Arm_P5 = RPN_Pred(fpn_filter_list[2], scale_list[2], 2) Arm_P6 = RPN_Pred(fpn_filter_list[3], scale_list[3], 2) Odm_P3 = RPN_Pred(256, scale_list[0], num_classes) Odm_P4 = RPN_Pred(256, scale_list[1], num_classes) Odm_P5 = RPN_Pred(256, scale_list[2], num_classes) Odm_P6 = RPN_Pred(256, scale_list[3], num_classes) self.Arm_list = nn.ModuleList([Arm_P3, Arm_P4, Arm_P5, Arm_P6]) self.Odm_list = nn.ModuleList([Odm_P3, Odm_P4, Odm_P5, Odm_P6]) def forward(self, x): """Applies network layers and ops on input image(s) x. Args: x: input image or batch of images. Shape: [batch,3,w,h]. Return: list of concat outputs from: 1: confidence layers, Shape: [batch*num_priors,num_classes] 2: localization layers, Shape: [batch,num_priors*4] 3: priorbox layers, Shape: [2,num_priors*4] """ sources = list() arm_loc = list() arm_conf = list() odm_loc = list() odm_conf = list() tcb_source = list() odm_conf_map = list() # apply vgg up to conv4_3 relu and conv5_3 relu c3, c4, c5, x = self.backone(x) c6 = self.res6(x) sources = [c3, c4, c5, c6] # apply ARM to source layers for (x, arm_pred) in zip(sources, self.Arm_list): arm_loc.append(arm_pred(x)[0].permute(0, 2, 3, 1).contiguous()) arm_conf.append(arm_pred(x)[1].permute(0, 2, 3, 1).contiguous()) arm_loc = torch.cat([tmp.view(tmp.size(0), -1) for tmp in arm_loc], 1) arm_conf = torch.cat([tmp.view(tmp.size(0), -1) for tmp in arm_conf], 1) #apply tcb p3, p4, p5, p6 = self.FPN(c3, c4, c5, c6) tcb_source = [p3, p4, p5, p6] # apply ODM to source layers for (x, odm_pred) in zip(tcb_source, self.Odm_list): odm_loc.append(odm_pred(x)[0].permute(0, 2, 3, 1).contiguous()) odm_conf.append(odm_pred(x)[1].permute(0, 2, 3, 1).contiguous()) odm_conf_map = odm_conf odm_loc = torch.cat([tmp.view(tmp.size(0), -1) for tmp in odm_loc], 1) odm_conf = torch.cat([tmp.view(tmp.size(0), -1) for tmp in odm_conf], 1) #print(arm_loc.size(), arm_conf.size(), odm_loc.size(), odm_conf.size()) output = (arm_loc.view(arm_loc.size(0), -1, 4), arm_conf.view(arm_conf.size(0), -1, 2), odm_loc.view(odm_loc.size(0), -1, 4), odm_conf.view(odm_conf.size(0), -1, self.num_classes), self.priors, odm_conf_map) return output def load_weights(self, base_file): other, ext = os.path.splitext(base_file) #device = torch.device('cpu') if ext == '.pkl' or '.pth': print('Loading weights into state dict...') self.load_state_dict(torch.load(base_file), strict=False) print('Finished!') else: print('Sorry only .pth and .pkl files supported.')
img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = torch.Tensor([ im_width, im_height, im_width, im_height, im_width, im_height, im_width, im_height, im_width, im_height, im_width, im_height, im_width, im_height ]) scale = scale.to(device) _t['forward_pass'].tic() loc, conf = net(img) # forward pass _t['forward_pass'].toc() _t['misc'].tic() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k]
class SSD(nn.Module): """ ssd model implementation Inputs: mode: train or test backbone: backbone for base network, 'mobilenetv3_large' or 'mobilenetv3_small' size: image size num_classes: number of object classes """ def __init__(self, mode, backbone, size, num_classes, with_fpn=True): super(SSD, self).__init__() assert mode in ["test", "train"] assert backbone in ['mobilenetv3_large', 'mobilenetv3_small'] self.mode = mode self.num_classes = num_classes self.cfg = (coco_config, voc_config)[num_classes == 21] self.priorbox = PriorBox(self.cfg) self.priors = self.priorbox.forward() self.size = size self.with_fpn = with_fpn # SSD network if self.with_fpn: self.basenet, self.topnet, self.conv_layers, self.fpn_layers, self.loc_layers, self.conf_layers =\ self.build_ssd_with_fpn(backbone, self.size, self.num_classes) else: self.basenet, self.topnet, self.loc_layers, self.conf_layers =\ self.build_ssd(backbone, self.size, self.num_classes) if mode == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) def forward(self, x): """Applies network layers and ops on input image(s) x. Args: x: input image or batch of images. Shape: [batch,3,256,256]. Return: Depending on phase: test: Variable(tensor) of output class label predictions, confidence score, and corresponding location predictions for each object detected. Shape: [batch,topk,7] train: list of concat outputs from: 1: confidence layers, Shape: [batch*num_priors,num_classes] 2: localization layers, Shape: [batch,num_priors*4] 3: priorbox layers, Shape: [2,num_priors*4] """ feature_inputs = [] loc_reg_output = [] # predict box regression of specific layer classify_output = [] # confidence of classification of specific layer # number of base layer to get box regression ans confidence for num, layer in enumerate(self.basenet): if num in self.cfg['net_source']: feature_inputs.append(layer.conv._modules['0'](x)) x = layer(x) for num, layer in enumerate(self.topnet): x = layer._modules['0'](x) x = layer._modules['1'](x) feature_inputs.append(x) x = layer._modules['2'](x) # FPN if self.with_fpn: for idx in range(len(feature_inputs) - 1, -1, -1): if idx == len(feature_inputs) - 1: x = self.conv_layers[idx](feature_inputs[idx]) p = nn.functional.interpolate(x, scale_factor=2) feature_inputs[idx] = x elif idx == 0: x = self.conv_layers[0](feature_inputs[0]) x += p feature_inputs[0] = self.fpn_layers[0](x) else: x = self.conv_layers[idx](feature_inputs[idx]) x += p p = nn.functional.interpolate(x, scale_factor=2) if idx <= 3: feature_inputs[idx] = self.fpn_layers[idx](x) else: feature_inputs[idx] = x for (x, loc_layer, conf_layer) in zip(feature_inputs, self.loc_layers, self.conf_layers): loc_reg_output.append( loc_layer(x).permute(0, 2, 3, 1).contiguous()) classify_output.append( conf_layer(x).permute(0, 2, 3, 1).contiguous()) loc_reg_output = torch.cat( [loc.view(loc.shape[0], -1) for loc in loc_reg_output], dim=1) loc_reg_output = loc_reg_output.view(loc_reg_output.shape[0], -1, 4) classify_output = torch.cat( [conf.view(conf.shape[0], -1) for conf in classify_output], dim=1) if self.mode == 'test': output = self.detect( loc_reg_output, self.softmax( classify_output.view(classify_output.shape[0], -1, self.num_classes)), self.priors) else: output = (loc_reg_output, classify_output.view(classify_output.shape[0], -1, self.num_classes), self.priors) return output def load_weights(self, base_file): other, ext = os.path.splitext(base_file) if ext == '.pkl' or '.pth': print('Loading weights into state dict...') self.load_state_dict( torch.load(base_file, map_location=lambda storage, loc: storage)) print('Finished!') else: print('Sorry only .pth and .pkl files supported.') def build_ssd_with_fpn(self, backbone, size, num_classes): conv_layers = [] fpn_layers = [] extra_layers = [] loc_layers = [] conf_layers = [] mobile_layers = [] # build backbone network if backbone == 'mobilenetv3_small': base_model = mobilenetv3_small(num_classes=num_classes, include_top=False) mobile_layers += base_model.get_layers() else: base_model = mobilenetv3_large(num_classes=num_classes, include_top=False) mobile_layers += base_model.get_layers() # build extras network on the top of the backbone in_channels = 96 for k, v in enumerate(self.cfg['extras'][str(size)]): extra_layers.append( nn.Sequential( nn.Conv2d(in_channels, v, kernel_size=1, stride=1), nn.Conv2d(v, v, kernel_size=3, stride=2, padding=1, groups=v), nn.Conv2d(v, v * 2, kernel_size=1, stride=1))) in_channels = v * 2 # build fpn and classify/regression layers mbox = self.cfg['mbox'][str(size)] for k, v in enumerate(self.cfg['net_source']): conv_layers += [ nn.Conv2d(mobile_layers[v].conv._modules['0'].out_channels, self.cfg['TOP_DOWN_PYRAMID_SIZE'], kernel_size=1) ] fpn_layers += [ nn.Conv2d(self.cfg['TOP_DOWN_PYRAMID_SIZE'], self.cfg['TOP_DOWN_PYRAMID_SIZE'], kernel_size=3, padding=1) ] loc_layers += [ nn.Conv2d(self.cfg['TOP_DOWN_PYRAMID_SIZE'], mbox[k] * 4, kernel_size=3, padding=1) ] conf_layers += [ nn.Conv2d(self.cfg['TOP_DOWN_PYRAMID_SIZE'], mbox[k] * num_classes, kernel_size=3, padding=1) ] for k, v in enumerate(extra_layers, 4): conv_layers += [ nn.Conv2d(v._modules['1'].out_channels, self.cfg['TOP_DOWN_PYRAMID_SIZE'], kernel_size=1) ] loc_layers += [ nn.Conv2d(self.cfg['TOP_DOWN_PYRAMID_SIZE'], mbox[k] * 4, kernel_size=3, padding=1) ] conf_layers += [ nn.Conv2d(self.cfg['TOP_DOWN_PYRAMID_SIZE'], mbox[k] * num_classes, kernel_size=3, padding=1) ] return nn.ModuleList(mobile_layers), nn.ModuleList(extra_layers), \ nn.ModuleList(conv_layers), nn.ModuleList(fpn_layers), \ nn.ModuleList(loc_layers), nn.ModuleList(conf_layers) def build_ssd(self, backbone, size, num_classes): mobile_layers = [] extra_layers = [] loc_layers = [] conf_layers = [] # build backbone network if backbone == 'mobilenetv3_small': base_model = mobilenetv3_small(num_classes=num_classes, include_top=False) mobile_layers += base_model.get_layers() else: base_model = mobilenetv3_large(num_classes=num_classes, include_top=False) mobile_layers += base_model.get_layers() # build extras network on the top of the backbone in_channels = 96 for k, v in enumerate(self.cfg['extras'][str(size)]): extra_layers.append( nn.Sequential( nn.Conv2d(in_channels, v, kernel_size=1, stride=1), nn.Conv2d(v, v, kernel_size=3, stride=2, padding=1, groups=v), nn.Conv2d(v, v * 2, kernel_size=1, stride=1))) in_channels = v * 2 # build fpn and classify/regression layers mbox = self.cfg['mbox'][str(size)] for k, v in enumerate(self.cfg['net_source']): loc_layers += [ nn.Conv2d(mobile_layers[v].conv._modules['0'].out_channels, mbox[k] * 4, kernel_size=3, padding=1) ] conf_layers += [ nn.Conv2d(mobile_layers[v].conv._modules['0'].out_channels, mbox[k] * num_classes, kernel_size=3, padding=1) ] for k, v in enumerate(extra_layers, 4): loc_layers += [ nn.Conv2d(v._modules['1'].out_channels, mbox[k] * 4, kernel_size=3, padding=1) ] conf_layers += [ nn.Conv2d(v._modules['1'].out_channels, mbox[k] * num_classes, kernel_size=3, padding=1) ] return nn.ModuleList(mobile_layers), nn.ModuleList(extra_layers), \ nn.ModuleList(loc_layers), nn.ModuleList(conf_layers) def to_cuda(self): self.priors = self.priors.cuda() self.cuda() return self
if len(gpu_ids) > 1: net = torch.nn.DataParallel(net, device_ids=gpu_ids) #device = torch.device(args.device) device = torch.device('cuda:' + str(gpu_ids[0])) cudnn.benchmark = True net = net.to(device) optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 3, 0.35, False, False) priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.to(device) def train(): net.train() #load the two dataset for face rectangles and landmarks respectively print('Loading Dataset...') dataset_rect = FaceRectLMDataset(training_face_rect_dir, img_dim, rgb_mean) dataset_landmark = FaceRectLMDataset(training_face_landmark_dir, img_dim, rgb_mean)
mbox_loc = Reshape((num_boxes, 4), name='mbox_loc_final')(mbox_loc) mbox_conf = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(mbox_conf) mbox_conf = Activation('softmax', name='mbox_conf_final')(mbox_conf) predictions = concatenate([mbox_loc, mbox_conf], axis=2, name='predictions') model = Model(inputs=input_layer, outputs=predictions) if weights_path is not None: model.load_weights(weights_path, by_name=True) if frozen_layers is not None: for layer in model.layers: if layer.name in frozen_layers: layer.trainable = False return model if __name__ == "__main__": from prior_box import PriorBox model = SSD300() print(model.output_shape) prior_box = PriorBox() prior_boxes = prior_box.forward() prior_boxes = prior_boxes.numpy() print(prior_boxes.shape) model.summary()
def test_net(save_folder, annopath, net, im_size=300, thresh=0.05): torch.set_grad_enabled(False) df = pd.read_csv(annopath) filenames = df['filename'].unique() all_img_boxes = [] filenames = ['/root/face_mask_lmks_detection/test_images/test.jpg'] resize = 1 # testing begin for i, image_path in enumerate(filenames): img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) # w h w h img -= (104, 117, 123) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.cuda() prior_data = priors.data img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.cuda() scale = scale.cuda() tic = time.time() loc, conf = net(img) # forward pass boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() # remove batch dim, as test only for single img scores = F.softmax(conf, dim=-1).squeeze( 0).data.cpu().numpy()[:, 1:] # conf : batch, num anchors, 3 # we need to max scores for each anchor labels = np.argmax(scores, axis=-1) scores = np.max(scores, axis=-1) # scores : number anchors, if len(scores) == 0: # todo pass keep_idx = single_class_non_max_suppression(boxes, scores, 0.6, 0.5) per_img_bboxes = [] for idx in keep_idx: conf = float(scores[idx]) class_id = labels[idx] bbox = boxes[idx] text = "{:.4f}".format(conf) # clip the coordinate, avoid the value exceed the image boundary. xmin = max(0, int(bbox[0])) ymin = max(0, int(bbox[1])) xmax = min(int(bbox[2]), im_width) ymax = min(int(bbox[3]), im_height) per_img_bboxes.append([xmin, ymin, xmax, ymax, conf, class_id]) if int(class_id) == 1: color = (0, 255, 0) else: color = (0, 0, 255) cv2.rectangle(img_raw, (xmin, ymin), (xmax, ymax), color, 2) cv2.putText(img_raw, text, (xmin, ymin + 12), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) cv2.imwrite('./result.jpg', img_raw) all_img_boxes.append(per_img_bboxes) print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, len(filenames), time.time() - tic))
# load net args = params() use_cuda = args.cuda if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) # use_cuda = torch.cuda.is_available() # if use_cuda: # torch.set_default_tensor_type('torch.cuda.FloatTensor') # else: # torch.set_default_tensor_type('torch.FloatTensor') net = S3FD(cfg.NUM_CLASSES, cfg.NumAnchor) net.load_state_dict(torch.load(args.trained_model)) net.eval() # detector = Detect(cfg) detector = DetectIou(cfg) anchors = PriorBox() priors = anchors() if use_cuda: net.cuda() cudnn.benckmark = True print('finish loading model') if args.dataname == 'scut': dataset = VOCDetection(cfg.HEAD.DIR, image_sets=[(args.dataset, 'test')], target_transform=VOCAnnotationTransform(), mode='test', dataset_name='SCUT') elif args.dataname == 'crowedhuman': dataset = ReadDataset(args.val_file, args.voc_root, train_mode='test') test_net(net, detector, priors, dataset, use_cuda, args)
class TinySSD(nn.Module): def __init__(self, training=True): super(TinySSD, self).__init__() self.basenet = EfficientNet.from_name('efficientnet-b0') self.training = training for idx, num_anchors in enumerate([4, 6, 6, 4, 4]): setattr(self, "predict_bbox_{}".format(idx + 1), nn.Conv2d(320, num_anchors * 4, kernel_size=3, padding=1)) setattr( self, "predict_class_{}".format(idx + 1), nn.Conv2d( # 这里3 是 2 + 1 320, 3 * num_anchors, kernel_size=3, padding=1)) self.priors = None for idx, k in enumerate([[320, 320], [320, 320], [320, 320]]): setattr( self, "feature_{}".format(idx + 2), nn.Sequential(nn.Conv2d(k[0], k[1], kernel_size=3, padding=1), nn.BatchNorm2d(k[1]), nn.ReLU(), nn.Conv2d(k[1], k[1], kernel_size=3, padding=1), nn.BatchNorm2d(k[1]), nn.ReLU(), nn.MaxPool2d(2))) def forward(self, x): x = self.basenet.extract_features(x) feature_1 = x feature_2 = self.feature_2(x) feature_3 = self.feature_3(feature_2) feature_4 = self.feature_4(feature_3) feature_5 = F.max_pool2d(feature_4, kernel_size=2) ''' (2,4*4,16,16) (2,4*6,8,8) (2,4*6,4,4), (2,4*4,2,2), (2,4*4,1,1) -> 每个 anchor 中心,连续4个值代表x y w h ''' confidences = [] locations = [] locations.append( self.predict_bbox_1(feature_1).permute(0, 2, 3, 1).contiguous()) locations.append( self.predict_bbox_2(feature_2).permute(0, 2, 3, 1).contiguous()) locations.append( self.predict_bbox_3(feature_3).permute(0, 2, 3, 1).contiguous()) locations.append( self.predict_bbox_4(feature_4).permute(0, 2, 3, 1).contiguous()) locations.append( self.predict_bbox_5(feature_5).permute(0, 2, 3, 1).contiguous()) locations = torch.cat([o.view(o.size(0), -1) for o in locations], 1) #(batch_size,total_anchor_num*4) locations = locations.view(locations.size(0), -1, 4) # (batch_size,total_anchor_num,4) confidences.append( self.predict_class_1(feature_1).permute(0, 2, 3, 1).contiguous()) confidences.append( self.predict_class_2(feature_2).permute(0, 2, 3, 1).contiguous()) confidences.append( self.predict_class_3(feature_3).permute(0, 2, 3, 1).contiguous()) confidences.append( self.predict_class_4(feature_4).permute(0, 2, 3, 1).contiguous()) confidences.append( self.predict_class_5(feature_5).permute(0, 2, 3, 1).contiguous()) confidences = torch.cat([o.view(o.size(0), -1) for o in confidences], 1) #(batch_size,total_anchor_num*4) confidences = confidences.view(confidences.size(0), -1, 3) # (batch_size,total_anchor_num,4) if not self.training: if self.priors is None: self.priors = PriorBox()() self.priors = self.priors.cuda() boxes = convert_locations_to_boxes(locations, self.priors, 0.1, 0.2) confidences = F.softmax(confidences, dim=2) return confidences, boxes else: #print(confidences.size(),locations.size()) return (confidences, locations) # (2,1111,3) (2,1111,4)
class RefineDet(nn.Module): """Single Shot Multibox Architecture The network is composed of a base VGG network followed by the added multibox conv layers. Each multibox layer branches into 1) conv2d for class conf scores 2) conv2d for localization predictions 3) associated priorbox layer to produce default bounding boxes specific to the layer's feature map size. See: RefineDet for more details. Args: size: input image size base: VGG16 layers for input, size of either 320 or 512 extras: extra layers that feed to multibox loc and conf layers ARM: "default box head" consists of loc and conf conv layers ODM: "multibox head" consists of loc and conf conv layers TCB: converting the features from the ARM to the ODM for detection numclass:ODM output classes """ def __init__(self, base, extras, ARM, ODM, TCB, num_classes): super(RefineDet, self).__init__() self.num_classes = num_classes self.priorbox = PriorBox(cfgs.PriorBox_Cfg[str(cfgs.ImgSize)]) with torch.no_grad(): self.priors = self.priorbox.forward() # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.conv4_3_L2Norm = L2Norm(512, 10) self.conv5_3_L2Norm = L2Norm(512, 8) self.extras = nn.ModuleList(extras) self.arm_loc = nn.ModuleList(ARM[0]) self.arm_conf = nn.ModuleList(ARM[1]) self.odm_loc = nn.ModuleList(ODM[0]) self.odm_conf = nn.ModuleList(ODM[1]) #self.tcb = nn.ModuleList(TCB) self.tcb0 = nn.ModuleList(TCB[0]) self.tcb1 = nn.ModuleList(TCB[1]) self.tcb2 = nn.ModuleList(TCB[2]) def forward(self, x): """Applies network layers and ops on input image(s) x. Args: x: input image or batch of images. Shape: [batch,3,w,h]. Return: list of concat outputs from: 1: confidence layers, Shape: [batch*num_priors,num_classes] 2: localization layers, Shape: [batch,num_priors*4] 3: priorbox layers, Shape: [2,num_priors*4] """ sources = list() tcb_source = list() arm_loc = list() arm_conf = list() odm_loc = list() odm_conf = list() odm_conf_debug = list() # apply vgg up to conv4_3 relu and conv5_3 relu for k in range(30): x = self.vgg[k](x) if 22 == k: s = self.conv4_3_L2Norm(x) sources.append(s) elif 29 == k: s = self.conv5_3_L2Norm(x) sources.append(s) # apply vgg up to fc7 for k in range(30, len(self.vgg)): x = self.vgg[k](x) sources.append(x) # apply extra layers and cache source layer outputs for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) if k % 2 == 1: sources.append(x) # apply ARM and ODM to source layers for (x, l, c) in zip(sources, self.arm_loc, self.arm_conf): arm_loc.append(l(x).permute(0, 2, 3, 1).contiguous()) arm_conf.append(c(x).permute(0, 2, 3, 1).contiguous()) arm_loc = torch.cat([tmp.view(tmp.size(0), -1) for tmp in arm_loc], 1) arm_conf = torch.cat([tmp.view(tmp.size(0), -1) for tmp in arm_conf], 1) #print([x.size() for x in sources]) # calculate TCB features #print([x.size() for x in sources]) p = None for k, v in enumerate(sources[::-1]): s = v for i in range(3): s = self.tcb0[(3-k)*3 + i](s) #print(s.size()) if k != 0: u = p u = self.tcb1[3-k](u) s += u for i in range(3): s = self.tcb2[(3-k)*3 + i](s) p = s tcb_source.append(s) #print([x.size() for x in tcb_source]) tcb_source.reverse() # apply ODM to source layers for (x, l, c) in zip(tcb_source, self.odm_loc, self.odm_conf): odm_loc.append(l(x).permute(0, 2, 3, 1).contiguous()) odm_conf.append(c(x).permute(0, 2, 3, 1).contiguous()) odm_conf_debug = odm_conf odm_loc = torch.cat([tmp.view(tmp.size(0), -1) for tmp in odm_loc], 1) odm_conf = torch.cat([tmp.view(tmp.size(0), -1) for tmp in odm_conf], 1) #print(arm_loc.size(), arm_conf.size(), odm_loc.size(), odm_conf.size()) output = ( arm_loc.view(arm_loc.size(0), -1, 4), arm_conf.view(arm_conf.size(0), -1, 2), odm_loc.view(odm_loc.size(0), -1, 4), odm_conf.view(odm_conf.size(0), -1, self.num_classes), self.priors, odm_conf_debug ) return output def load_weights(self, base_file): other, ext = os.path.splitext(base_file) #device = torch.device('cpu') if ext == '.pkl' or '.pth': print('Loading weights into state dict...') self.load_state_dict(torch.load(base_file),strict=False) print('Finished!') else: print('Sorry only .pth and .pkl files supported.')
# net and model net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, args.trained_model, args.cpu) net.eval() print('Finished loading model!') print(net) cudnn.benchmark = True #decides optimal model for execution - need to change if input size is changing device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) resize = 1 num = 1 # testing begin folder = '/home/siddhartha/Siddhartha/Reveal-Media/Reveal Videos/redaction/train/7' priorbox = PriorBox(cfg, image_size=(720, 1280)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data exception = [] for filename in os.listdir(folder): img_raw = cv2.imread(os.path.join(folder, filename)) if img_raw is not None: imgpth = os.path.join(folder, filename) A, B, _ = img_raw.shape print(A, B) #img_raw = cv2.resize(img_raw,(720,720)) #image_path = "./curve/test.jpg" #img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw)
def main(): args = params() logger = createlogger(args.log_dir) net, optimizer, criterion, train_loader, val_loader = train_net(args) start_epoch = args.start_epoch iteration = 0 net.train() rgb_mean = np.array([123., 117., 104.])[np.newaxis, np.newaxis, :].astype('float32') loss_hist = collections.deque(maxlen=200) loss_class_min = 10.0 loss_reg_min = 10.0 prior_box = PriorBox() with torch.no_grad(): priors = prior_box() for epoch in range(start_epoch, cfg.EPOCHES): #losses = 0 lr = poly_lr_scheduler(optimizer, args.lr, epoch, max_iter=cfg.EPOCHES) for batch_idx, (images, targets) in enumerate(train_loader): if args.cuda: images = images.cuda() #Variable(images.cuda()) targets = [ann.cuda() for ann in targets] ''' conf_t = test_anchor(targets,priors,cfg) images = images.cpu().numpy() for i in range(args.batch_size): tmp_img = np.transpose(images[i],(1,2,0)) tmp_img = tmp_img + rgb_mean #tmp_img = tmp_img * 255 tmp_img = np.array(tmp_img,dtype=np.uint8) tmp_img = cv2.cvtColor(tmp_img,cv2.COLOR_RGB2BGR) h,w = tmp_img.shape[:2] if len(targets[i])>0: gt = targets[i].cpu().numpy() for j in range(gt.shape[0]): x1,y1 = int(gt[j,0]*w),int(gt[j,1]*h) x2,y2 = int(gt[j,2]*w),int(gt[j,3]*h) # print('pred',x1,y1,x2,y2,gt[j,4],w,h) if x2 >x1 and y2 >y1: cv2.rectangle(tmp_img,(x1,y1),(x2,y2),(0,0,255)) for j in range(priors.size(0)): if conf_t[i,j] >0: box = priors[j].cpu().numpy() # print(box) x1,y1 = box[:2] - box[2:] / 2 x2,y2 = box[:2] + box[2:] / 2 x1,y1 = int(x1*w),int(y1*h) x2,y2 = int(x2*w),int(y2*h) cv2.rectangle(tmp_img,(x1,y1),(x2,y2),(255,0,0)) cv2.imshow('src',tmp_img) cv2.waitKey(0) ''' # if iteration in cfg.LR_STEPS: # step_index += 1 # adjust_learning_rate(args.lr,optimizer, args.gamma, step_index) # t0 = time.time() out = net(images) # backprop optimizer.zero_grad() # loss_l, loss_c,loss_iou = criterion(out,priors, targets) loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() # t1 = time.time() loss_hist.append(float(loss.item())) if iteration % 100 == 0: #tloss = losses / 100.0 #print('tl',loss.data,tloss) logger.info( 'epoch:{} || iter:{} || tloss:{:.4f}, confloss:{:.4f}, locloss:{:.4f} || lr:{:.6f}' .format(epoch, iteration, np.mean(loss_hist), loss_c.item(), loss_l.item(), lr)) #losses = 0 if iteration != 0 and iteration % 100 == 0: tmpl, tmpc = val(args, net, val_loader, criterion, priors, logger) if tmpl < loss_reg_min or tmpc < loss_class_min: loss_reg_min = tmpl loss_class_min = tmpc logger.info('Saving state, iter: %d' % iteration) sfile = 'sfd_' + args.dataset + '_best.pth' spath = os.path.join(args.save_folder, sfile) if args.multigpu: torch.save(net.module.state_dict(), spath) else: torch.save(net.state_dict(), spath) iteration += 1 #val(args,net,val_loader,criterion) if iteration == cfg.MAX_STEPS: break