Beispiel #1
0
def detect_face(net, img, device, scale=1., conf_thresh=0.3):
    # set input x
    if scale != 1:
        img = cv2.resize(img, None, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
    x = torch.from_numpy(img).permute(2, 0, 1).unsqueeze(0)
    _, _, height, width = x.shape
    if device.type == 'cuda':
        x = x.to(device)

    # forward pass
    loc, conf, iou = net(x)

    # get bounding boxes from PriorBox layer
    bbox_scale = torch.Tensor([width, height, width, height])
    priorbox = PriorBox(cfg, image_size=(height, width))
    priors = priorbox.forward()
    boxes = decode(loc.squeeze(0).data.cpu(), priors.data, cfg['variance'])
    boxes = boxes[:, :4] # omit landmarks
    boxes = boxes * bbox_scale / scale
    boxes = boxes.cpu().numpy()
    # get scores
    cls_scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    iou_scores = iou.squeeze(0).data.cpu().numpy()[:, 0]
    # clamp here for the compatibility for ONNX
    _idx = np.where(iou_scores < 0.)
    iou_scores[_idx] = 0.
    _idx = np.where(iou_scores > 1.)
    iou_scores[_idx] = 1.
    scores = np.sqrt(cls_scores * iou_scores)

    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
    # ignore low scores
    keep_ind = np.where(dets[:, -1] > conf_thresh)[0]
    dets = dets[keep_ind, :]
    return dets
Beispiel #2
0
    def forward(self, x):
        x = self.basenet.extract_features(x)
        feature_1 = x
        feature_2 = self.feature_2(x)
        feature_3 = self.feature_3(feature_2)
        feature_4 = self.feature_4(feature_3)
        feature_5 = F.max_pool2d(feature_4, kernel_size=2)
        '''
        (2,4*4,16,16)
        (2,4*6,8,8)
        (2,4*6,4,4),
        (2,4*4,2,2),
        (2,4*4,1,1)

        -> 每个 anchor 中心,连续4个值代表x y w h
        '''
        confidences = []
        locations = []
        locations.append(
            self.predict_bbox_1(feature_1).permute(0, 2, 3, 1).contiguous())
        locations.append(
            self.predict_bbox_2(feature_2).permute(0, 2, 3, 1).contiguous())
        locations.append(
            self.predict_bbox_3(feature_3).permute(0, 2, 3, 1).contiguous())
        locations.append(
            self.predict_bbox_4(feature_4).permute(0, 2, 3, 1).contiguous())
        locations.append(
            self.predict_bbox_5(feature_5).permute(0, 2, 3, 1).contiguous())
        locations = torch.cat([o.view(o.size(0), -1) for o in locations],
                              1)  #(batch_size,total_anchor_num*4)
        locations = locations.view(locations.size(0), -1,
                                   4)  # (batch_size,total_anchor_num,4)

        confidences.append(
            self.predict_class_1(feature_1).permute(0, 2, 3, 1).contiguous())
        confidences.append(
            self.predict_class_2(feature_2).permute(0, 2, 3, 1).contiguous())
        confidences.append(
            self.predict_class_3(feature_3).permute(0, 2, 3, 1).contiguous())
        confidences.append(
            self.predict_class_4(feature_4).permute(0, 2, 3, 1).contiguous())
        confidences.append(
            self.predict_class_5(feature_5).permute(0, 2, 3, 1).contiguous())
        confidences = torch.cat([o.view(o.size(0), -1) for o in confidences],
                                1)  #(batch_size,total_anchor_num*4)
        confidences = confidences.view(confidences.size(0), -1,
                                       3)  # (batch_size,total_anchor_num,4)
        if not self.training:
            if self.priors is None:
                self.priors = PriorBox()()
                self.priors = self.priors.cuda()
            boxes = convert_locations_to_boxes(locations, self.priors, 0.1,
                                               0.2)
            confidences = F.softmax(confidences, dim=2)
            return confidences, boxes
        else:
            #print(confidences.size(),locations.size())
            return (confidences, locations)  #  (2,1111,3) (2,1111,4)
Beispiel #3
0
 def __init__(self, pretrained=None):
     super(ScratchDet, self).__init__()
     self.pretraind_weight = pretrained
     self.root_res = SSDRES512(input_size=(512, 512), depth=101)
     self.loc_layers, self.conf_layers = pred_brach()
     self.priors = torch.Tensor(PriorBox().forward())
     self.init_weight(pretrained=self.pretraind_weight)
def main(args):
    torch.set_grad_enabled(False)

    device = torch.device(args.device)

    # Initialize the net and load the model
    print('Loading pretrained model from {}'.format(args.trained_model))
    net = YuFaceDetectNet(phase='test', size=None)
    net = load_model(net, args.trained_model)
    net.eval()
    if device.type == 'cuda':
        cudnn.benchmark = True
        net = net.to(device)
    print('Finished loading model!')

    # init data loader for WIDER Face
    print('Loading data for {}...'.format(args.widerface_split))
    widerface = WIDERFace(args.widerface_root, split=args.widerface_split)
    print('Finished loading data!')

    # start testing
    scales = []
    if args.multi_scale:
        scales = [0.25, 0.50, 0.75, 1.25, 1.50, 1.75, 2.0]
    print('Performing testing with scales: 1. {}, conf_threshold: {}'.format(
        str(scales), args.confidence_threshold))
    priors_dict = {}
    for idx in tqdm(range(len(widerface))):
        img, event, name = widerface[idx]  # img_subpath = '0--Parade/XXX.jpg'
        if img.shape in priors_dict:
            priors = priors_dict[img.shape]
        else:
            height, width, _ = img.shape
            priors = PriorBox(cfg, image_size=(height, width)).forward()
            priors_dict[img.shape] = priors
        dets = detect_face(net, img, priors, device)
        available_scales = get_available_scales(img.shape[0], img.shape[1],
                                                scales)
        for available_scale in available_scales:
            det = detect_face(net, img, None, device, scale=available_scale)
            if det.shape[0] != 0:
                dets = np.row_stack((dets, det))
        # nms
        dets = nms_opencv(dets,
                          score_thresh=args.confidence_threshold,
                          nms_thresh=args.nms_threshold,
                          top_k=args.top_k,
                          keep_top_k=args.keep_top_k)
        save_res(dets, event, name)

    # widerface_eval
    print('Evaluating:')
    evaluation(args.res_dir,
               os.path.join(args.widerface_root, 'eval_tools/ground_truth'))
Beispiel #5
0
 def __init__(self, base, extras, ARM, ODM, TCB, num_classes):
     super(RefineDet, self).__init__()
     self.num_classes = num_classes
     self.priorbox = PriorBox(cfgs.PriorBox_Cfg[str(cfgs.ImgSize)])
     with torch.no_grad():
         self.priors = self.priorbox.forward()
     # SSD network
     self.vgg = nn.ModuleList(base)
     # Layer learns to scale the l2 normalized features from conv4_3
     self.conv4_3_L2Norm = L2Norm(512, 10)
     self.conv5_3_L2Norm = L2Norm(512, 8)
     self.extras = nn.ModuleList(extras)
     self.arm_loc = nn.ModuleList(ARM[0])
     self.arm_conf = nn.ModuleList(ARM[1])
     self.odm_loc = nn.ModuleList(ODM[0])
     self.odm_conf = nn.ModuleList(ODM[1])
     #self.tcb = nn.ModuleList(TCB)
     self.tcb0 = nn.ModuleList(TCB[0])
     self.tcb1 = nn.ModuleList(TCB[1])
     self.tcb2 = nn.ModuleList(TCB[2])
 def __init__(self, fpn_filter_list, scale_list, num_classes):
     super(RefineDet, self).__init__()
     self.num_classes = num_classes
     self.priorbox = PriorBox(cfgs.PriorBox_Cfg_resnet[str(cfgs.ImgSize)])
     with torch.no_grad():
         self.priors = self.priorbox.forward()
     # SSD network
     inplanes = 2048
     planes = 512
     self.backone = resnet101(pretrained=True)
     self.res6 = resnet_layer5(inplanes, planes, 3, 2)
     self.FPN = FPN(fpn_filter_list)
     Arm_P3 = RPN_Pred(fpn_filter_list[0], scale_list[0], 2)
     Arm_P4 = RPN_Pred(fpn_filter_list[1], scale_list[1], 2)
     Arm_P5 = RPN_Pred(fpn_filter_list[2], scale_list[2], 2)
     Arm_P6 = RPN_Pred(fpn_filter_list[3], scale_list[3], 2)
     Odm_P3 = RPN_Pred(256, scale_list[0], num_classes)
     Odm_P4 = RPN_Pred(256, scale_list[1], num_classes)
     Odm_P5 = RPN_Pred(256, scale_list[2], num_classes)
     Odm_P6 = RPN_Pred(256, scale_list[3], num_classes)
     self.Arm_list = nn.ModuleList([Arm_P3, Arm_P4, Arm_P5, Arm_P6])
     self.Odm_list = nn.ModuleList([Odm_P3, Odm_P4, Odm_P5, Odm_P6])
def get_prediction(img, scale, im_height, im_width, print_messages=False):
    loc, conf = net(img)  # forward pass
    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

    # ignore low scores
    inds = np.where(scores > 0.3)[0]
    boxes = boxes[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1][:5000]
    boxes = boxes[order]
    scores = scores[order]
    if print_messages:
        print('there are', len(boxes), 'candidates')
    return boxes, scores
Beispiel #8
0
    def __init__(self, mode, backbone, size, num_classes, with_fpn=True):
        super(SSD, self).__init__()

        assert mode in ["test", "train"]
        assert backbone in ['mobilenetv3_large', 'mobilenetv3_small']

        self.mode = mode
        self.num_classes = num_classes
        self.cfg = (coco_config, voc_config)[num_classes == 21]
        self.priorbox = PriorBox(self.cfg)
        self.priors = self.priorbox.forward()
        self.size = size
        self.with_fpn = with_fpn
        # SSD network
        if self.with_fpn:
            self.basenet, self.topnet, self.conv_layers, self.fpn_layers, self.loc_layers, self.conf_layers =\
                self.build_ssd_with_fpn(backbone, self.size, self.num_classes)
        else:
            self.basenet, self.topnet, self.loc_layers, self.conf_layers =\
                self.build_ssd(backbone, self.size, self.num_classes)

        if mode == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
Beispiel #9
0
    def forward(self, x, targets=None):
        sources = []
        confidences = []
        locations = []
        for i in range(23):
            x = self.vgg[i](x)
        s = self.l2_norm(x)  # Conv4_3 L2 normalization
        sources.append(s)

        # apply vgg up to fc7
        for i in range(23, len(self.vgg)):
            x = self.vgg[i](x)
        sources.append(x)

        for k, v in enumerate(self.extras):
            x = F.relu(v(x), inplace=True)
            if k % 2 == 1:
                sources.append(x)

        for (x, l, c) in zip(sources, self.regression_headers,
                             self.classification_headers):
            locations.append(l(x).permute(0, 2, 3, 1).contiguous())
            confidences.append(c(x).permute(0, 2, 3, 1).contiguous())

        confidences = torch.cat([o.view(o.size(0), -1) for o in confidences],
                                1)
        locations = torch.cat([o.view(o.size(0), -1) for o in locations], 1)

        confidences = confidences.view(confidences.size(0), -1,
                                       self.num_classes)
        locations = locations.view(locations.size(0), -1, 4)

        if not self.training:
            # when evaluating, decode predictions
            if self.priors is None:
                self.priors = PriorBox()().to(locations.device)
            confidences = F.softmax(confidences, dim=2)
            boxes = box_utils.convert_locations_to_boxes(
                locations, self.priors, 0.1, 0.2)
            boxes = box_utils.center_form_to_corner_form(boxes)
            print("testing !")
            return confidences, boxes
        else:
            return (confidences, locations)
Beispiel #10
0
    def __init__(self, args):
        if args.ctx and torch.cuda.is_available():
            self.use_cuda = True
        else:
            self.use_cuda = False
        if self.use_cuda:
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        else:
            torch.set_default_tensor_type('torch.FloatTensor')
        self.loadmodel(args.modelpath)
        self.threshold = args.threshold
        self.img_dir = args.img_dir

        self.detect = Detect(cfg)
        # self.detect = DetectIou(cfg)
        # self.detect = Detect_demo(cfg)
        self.Prior = PriorBox()
        with torch.no_grad():
            self.priors = self.Prior()
        self.num_classes = cfg.NUM_CLASSES
Beispiel #11
0
 def __init__(self,
              img_path="./dataset",
              transform=None,
              center_variance=0.1,
              size_variance=0.2):
     self.center_variance = center_variance
     self.size_variance = size_variance
     self.img_paths = glob.glob(img_path + "/images/*.jpg")
     self.labels = [
         label.replace(".jpg", ".xml").replace("images", "labels")
         for label in self.img_paths
     ]
     self.class_names = ("__background__", "basketball", "volleyball")
     prior = PriorBox()
     self.center_form_priors = prior()  # center form
     self.imgW, self.imgH = 512, 512
     self.corner_form_priors = center_form_to_corner_form(
         self.center_form_priors)
     #print(self.center_form_priors.size(),self.corner_form_priors.size())
     self.transform = transform
Beispiel #12
0
    def __init__(self, config):
        super(SSAD, self).__init__()
        self.num_classes = config.num_classes
        self.num_anchors = config.num_anchors
        self.input_feature_dim = config.feature_dim
        self.prediction_output = self.num_anchors * (self.num_classes + 3)
        self.best_loss = 10000000
        self.prior_box = PriorBox(config)
        # Base Layers
        self.base_layers = nn.Sequential(OrderedDict([
            ('conv1d_1',
             nn.Conv1d(in_channels=self.input_feature_dim, out_channels=512, kernel_size=9, stride=1, padding=4)),
            ('relu_1', nn.ReLU()),
            ('maxpooling1d_1', nn.MaxPool1d(kernel_size=4, stride=2, padding=1)),
            ('conv1d_2', nn.Conv1d(in_channels=512, out_channels=512, kernel_size=9, stride=1, padding=4)),
            ('relu_2', nn.ReLU()),
            ('maxpooling1d_2', nn.MaxPool1d(kernel_size=4, stride=2, padding=1))
        ]))

        # Anchor Layers
        self.anchor_layer1 = nn.Sequential(
            nn.Conv1d(in_channels=512, out_channels=1024, kernel_size=3, stride=2, padding=1),
            nn.ReLU())
        self.anchor_layer2 = nn.Sequential(
            nn.Conv1d(in_channels=1024, out_channels=1024, kernel_size=3, stride=2, padding=1),
            nn.ReLU())
        self.anchor_layer3 = nn.Sequential(
            nn.Conv1d(in_channels=1024, out_channels=1024, kernel_size=3, stride=2, padding=1),
            nn.ReLU())

        # Prediction Layers
        self.prediction_layer1 = nn.Conv1d(in_channels=1024, out_channels=self.prediction_output, kernel_size=3,
                                           stride=1, padding=1)
        self.prediction_layer2 = nn.Conv1d(in_channels=1024, out_channels=self.prediction_output, kernel_size=3,
                                           stride=1, padding=1)
        self.prediction_layer3 = nn.Conv1d(in_channels=1024, out_channels=self.prediction_output, kernel_size=3,
                                           stride=1, padding=1)

        self.reset_params()
def create_model():
    '''
    '''
    ASPECT_RATIOS = [[2, 3], [2, 3], [2, 3], [2, 3], [2], [2]]
    IMAGE_SIZE = [300, 300]
    FEATURE_LAYER = [[22, 34, 'S', 'S', '', ''],
                     [512, 1024, 512, 256, 256, 256]]
    NUM_CLASSES = 21
    SIZES = [0.2, 0.95]
    STEPS = []
    CLIP = True
    #
    base = networks_map['resnet_50']
    number_box = [
        2 * len(aspect_ratios)
        if isinstance(aspect_ratios[0], int) else len(aspect_ratios)
        for aspect_ratios in ASPECT_RATIOS
    ]

    model = ssds_map['rfb'](base=base,
                            feature_layer=FEATURE_LAYER,
                            mbox=number_box,
                            num_classes=NUM_CLASSES)
    #
    print(model)
    feature_maps = _forward_features_size(model, IMAGE_SIZE)
    print('==>Feature map size:')
    print(feature_maps)
    #
    priorbox = PriorBox(image_size=IMAGE_SIZE,
                        feature_maps=feature_maps,
                        aspect_ratios=ASPECT_RATIOS,
                        scale=SIZES,
                        archor_stride=STEPS,
                        clip=CLIP)
    # priors = Variable(priorbox.forward(), volatile=True)

    return model, priorbox
Beispiel #14
0
    # net = MobileNetV1(num_classes=num_classes)
    net = RetinaFace(cfg=cfg).to(device)
    params = [p for p in net.parameters() if p.requires_grad]

    optimizer = torch.optim.SGD(params,
                                lr=LR,
                                momentum=0.9,
                                weight_decay=0.0005)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=10,
                                                   gamma=0.1)

    # criterion = nn.CrossEntropyLoss().to(device)
    criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False)

    priorbox = PriorBox(cfg, image_size=(IMG_DIM, IMG_DIM))
    with torch.no_grad():
        priors = priorbox.forward()
        priors = priors.to(device)

    for epoch in range(MAX_EPOCH):
        lr = lr_scheduler.get_lr()[-1]
        t1 = time.time()
        time_dict = dict()
        for i, data in enumerate(train_loader):
            t2 = time.time()
            time_dict['iterdat'] = t2 - t1
            print(time_dict['iterdat'])
            continue

            images, targets = data  # B C H W
class RefineDet(nn.Module):
    """Single Shot Multibox Architecture
    The network is composed of a base VGG network followed by the
    added multibox conv layers.  Each multibox layer branches into
        1) conv2d for class conf scores
        2) conv2d for localization predictions
        3) associated priorbox layer to produce default bounding
           boxes specific to the layer's feature map size.
    See: RefineDet for more details.
    Args:
        size: input image size
        base: VGG16 layers for input, size of either 320 or 512
        extras: extra layers that feed to multibox loc and conf layers
        ARM: "default box head" consists of loc and conf conv layers
        ODM: "multibox head" consists of loc and conf conv layers
        TCB: converting the features from the ARM to the ODM for detection
        numclass:ODM output classes
    """
    def __init__(self, fpn_filter_list, scale_list, num_classes):
        super(RefineDet, self).__init__()
        self.num_classes = num_classes
        self.priorbox = PriorBox(cfgs.PriorBox_Cfg_resnet[str(cfgs.ImgSize)])
        with torch.no_grad():
            self.priors = self.priorbox.forward()
        # SSD network
        inplanes = 2048
        planes = 512
        self.backone = resnet101(pretrained=True)
        self.res6 = resnet_layer5(inplanes, planes, 3, 2)
        self.FPN = FPN(fpn_filter_list)
        Arm_P3 = RPN_Pred(fpn_filter_list[0], scale_list[0], 2)
        Arm_P4 = RPN_Pred(fpn_filter_list[1], scale_list[1], 2)
        Arm_P5 = RPN_Pred(fpn_filter_list[2], scale_list[2], 2)
        Arm_P6 = RPN_Pred(fpn_filter_list[3], scale_list[3], 2)
        Odm_P3 = RPN_Pred(256, scale_list[0], num_classes)
        Odm_P4 = RPN_Pred(256, scale_list[1], num_classes)
        Odm_P5 = RPN_Pred(256, scale_list[2], num_classes)
        Odm_P6 = RPN_Pred(256, scale_list[3], num_classes)
        self.Arm_list = nn.ModuleList([Arm_P3, Arm_P4, Arm_P5, Arm_P6])
        self.Odm_list = nn.ModuleList([Odm_P3, Odm_P4, Odm_P5, Odm_P6])

    def forward(self, x):
        """Applies network layers and ops on input image(s) x.
        Args:
            x: input image or batch of images. Shape: [batch,3,w,h].
        Return:
            list of concat outputs from:
                1: confidence layers, Shape: [batch*num_priors,num_classes]
                2: localization layers, Shape: [batch,num_priors*4]
                3: priorbox layers, Shape: [2,num_priors*4]
        """
        sources = list()
        arm_loc = list()
        arm_conf = list()
        odm_loc = list()
        odm_conf = list()
        tcb_source = list()
        odm_conf_map = list()
        # apply vgg up to conv4_3 relu and conv5_3 relu
        c3, c4, c5, x = self.backone(x)
        c6 = self.res6(x)
        sources = [c3, c4, c5, c6]
        # apply ARM  to source layers
        for (x, arm_pred) in zip(sources, self.Arm_list):
            arm_loc.append(arm_pred(x)[0].permute(0, 2, 3, 1).contiguous())
            arm_conf.append(arm_pred(x)[1].permute(0, 2, 3, 1).contiguous())
        arm_loc = torch.cat([tmp.view(tmp.size(0), -1) for tmp in arm_loc], 1)
        arm_conf = torch.cat([tmp.view(tmp.size(0), -1) for tmp in arm_conf],
                             1)
        #apply tcb
        p3, p4, p5, p6 = self.FPN(c3, c4, c5, c6)
        tcb_source = [p3, p4, p5, p6]
        # apply ODM to source layers
        for (x, odm_pred) in zip(tcb_source, self.Odm_list):
            odm_loc.append(odm_pred(x)[0].permute(0, 2, 3, 1).contiguous())
            odm_conf.append(odm_pred(x)[1].permute(0, 2, 3, 1).contiguous())
        odm_conf_map = odm_conf
        odm_loc = torch.cat([tmp.view(tmp.size(0), -1) for tmp in odm_loc], 1)
        odm_conf = torch.cat([tmp.view(tmp.size(0), -1) for tmp in odm_conf],
                             1)
        #print(arm_loc.size(), arm_conf.size(), odm_loc.size(), odm_conf.size())
        output = (arm_loc.view(arm_loc.size(0), -1,
                               4), arm_conf.view(arm_conf.size(0), -1, 2),
                  odm_loc.view(odm_loc.size(0), -1, 4),
                  odm_conf.view(odm_conf.size(0), -1,
                                self.num_classes), self.priors, odm_conf_map)
        return output

    def load_weights(self, base_file):
        other, ext = os.path.splitext(base_file)
        #device = torch.device('cpu')
        if ext == '.pkl' or '.pth':
            print('Loading weights into state dict...')
            self.load_state_dict(torch.load(base_file), strict=False)
            print('Finished!')
        else:
            print('Sorry only .pth and .pkl files supported.')
Beispiel #16
0
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)

    scale = torch.Tensor([
        im_width, im_height, im_width, im_height, im_width, im_height,
        im_width, im_height, im_width, im_height, im_width, im_height,
        im_width, im_height
    ])
    scale = scale.to(device)

    _t['forward_pass'].tic()
    loc, conf = net(img)  # forward pass
    _t['forward_pass'].toc()
    _t['misc'].tic()

    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

    # ignore low scores
    inds = np.where(scores > args.confidence_threshold)[0]
    boxes = boxes[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1][:args.top_k]
Beispiel #17
0
class SSD(nn.Module):
    """ ssd model implementation
    Inputs:
        mode: train or test
        backbone: backbone for base network, 'mobilenetv3_large' or 'mobilenetv3_small'
        size: image size
        num_classes: number of object classes 
    """
    def __init__(self, mode, backbone, size, num_classes, with_fpn=True):
        super(SSD, self).__init__()

        assert mode in ["test", "train"]
        assert backbone in ['mobilenetv3_large', 'mobilenetv3_small']

        self.mode = mode
        self.num_classes = num_classes
        self.cfg = (coco_config, voc_config)[num_classes == 21]
        self.priorbox = PriorBox(self.cfg)
        self.priors = self.priorbox.forward()
        self.size = size
        self.with_fpn = with_fpn
        # SSD network
        if self.with_fpn:
            self.basenet, self.topnet, self.conv_layers, self.fpn_layers, self.loc_layers, self.conf_layers =\
                self.build_ssd_with_fpn(backbone, self.size, self.num_classes)
        else:
            self.basenet, self.topnet, self.loc_layers, self.conf_layers =\
                self.build_ssd(backbone, self.size, self.num_classes)

        if mode == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)

    def forward(self, x):
        """Applies network layers and ops on input image(s) x.

        Args:
            x: input image or batch of images. Shape: [batch,3,256,256].

        Return:
            Depending on phase:
            test:
                Variable(tensor) of output class label predictions,
                confidence score, and corresponding location predictions for
                each object detected. Shape: [batch,topk,7]

            train:
                list of concat outputs from:
                    1: confidence layers, Shape: [batch*num_priors,num_classes]
                    2: localization layers, Shape: [batch,num_priors*4]
                    3: priorbox layers, Shape: [2,num_priors*4]
        """

        feature_inputs = []
        loc_reg_output = []  # predict box regression of specific layer
        classify_output = []  # confidence of classification of specific layer
        # number of base layer to get box regression ans confidence
        for num, layer in enumerate(self.basenet):
            if num in self.cfg['net_source']:
                feature_inputs.append(layer.conv._modules['0'](x))
            x = layer(x)
        for num, layer in enumerate(self.topnet):
            x = layer._modules['0'](x)
            x = layer._modules['1'](x)
            feature_inputs.append(x)
            x = layer._modules['2'](x)

        # FPN
        if self.with_fpn:
            for idx in range(len(feature_inputs) - 1, -1, -1):
                if idx == len(feature_inputs) - 1:
                    x = self.conv_layers[idx](feature_inputs[idx])
                    p = nn.functional.interpolate(x, scale_factor=2)
                    feature_inputs[idx] = x
                elif idx == 0:
                    x = self.conv_layers[0](feature_inputs[0])
                    x += p
                    feature_inputs[0] = self.fpn_layers[0](x)
                else:
                    x = self.conv_layers[idx](feature_inputs[idx])
                    x += p
                    p = nn.functional.interpolate(x, scale_factor=2)
                    if idx <= 3:
                        feature_inputs[idx] = self.fpn_layers[idx](x)
                    else:
                        feature_inputs[idx] = x

        for (x, loc_layer, conf_layer) in zip(feature_inputs, self.loc_layers,
                                              self.conf_layers):
            loc_reg_output.append(
                loc_layer(x).permute(0, 2, 3, 1).contiguous())
            classify_output.append(
                conf_layer(x).permute(0, 2, 3, 1).contiguous())

        loc_reg_output = torch.cat(
            [loc.view(loc.shape[0], -1) for loc in loc_reg_output], dim=1)
        loc_reg_output = loc_reg_output.view(loc_reg_output.shape[0], -1, 4)
        classify_output = torch.cat(
            [conf.view(conf.shape[0], -1) for conf in classify_output], dim=1)
        if self.mode == 'test':
            output = self.detect(
                loc_reg_output,
                self.softmax(
                    classify_output.view(classify_output.shape[0], -1,
                                         self.num_classes)), self.priors)
        else:
            output = (loc_reg_output,
                      classify_output.view(classify_output.shape[0], -1,
                                           self.num_classes), self.priors)

        return output

    def load_weights(self, base_file):
        other, ext = os.path.splitext(base_file)
        if ext == '.pkl' or '.pth':
            print('Loading weights into state dict...')
            self.load_state_dict(
                torch.load(base_file,
                           map_location=lambda storage, loc: storage))
            print('Finished!')
        else:
            print('Sorry only .pth and .pkl files supported.')

    def build_ssd_with_fpn(self, backbone, size, num_classes):
        conv_layers = []
        fpn_layers = []
        extra_layers = []
        loc_layers = []
        conf_layers = []
        mobile_layers = []

        # build backbone network
        if backbone == 'mobilenetv3_small':
            base_model = mobilenetv3_small(num_classes=num_classes,
                                           include_top=False)
            mobile_layers += base_model.get_layers()
        else:
            base_model = mobilenetv3_large(num_classes=num_classes,
                                           include_top=False)
            mobile_layers += base_model.get_layers()

        # build extras network on the top of the backbone
        in_channels = 96
        for k, v in enumerate(self.cfg['extras'][str(size)]):
            extra_layers.append(
                nn.Sequential(
                    nn.Conv2d(in_channels, v, kernel_size=1, stride=1),
                    nn.Conv2d(v,
                              v,
                              kernel_size=3,
                              stride=2,
                              padding=1,
                              groups=v),
                    nn.Conv2d(v, v * 2, kernel_size=1, stride=1)))
            in_channels = v * 2

        # build fpn and classify/regression layers
        mbox = self.cfg['mbox'][str(size)]
        for k, v in enumerate(self.cfg['net_source']):
            conv_layers += [
                nn.Conv2d(mobile_layers[v].conv._modules['0'].out_channels,
                          self.cfg['TOP_DOWN_PYRAMID_SIZE'],
                          kernel_size=1)
            ]
            fpn_layers += [
                nn.Conv2d(self.cfg['TOP_DOWN_PYRAMID_SIZE'],
                          self.cfg['TOP_DOWN_PYRAMID_SIZE'],
                          kernel_size=3,
                          padding=1)
            ]
            loc_layers += [
                nn.Conv2d(self.cfg['TOP_DOWN_PYRAMID_SIZE'],
                          mbox[k] * 4,
                          kernel_size=3,
                          padding=1)
            ]
            conf_layers += [
                nn.Conv2d(self.cfg['TOP_DOWN_PYRAMID_SIZE'],
                          mbox[k] * num_classes,
                          kernel_size=3,
                          padding=1)
            ]
        for k, v in enumerate(extra_layers, 4):
            conv_layers += [
                nn.Conv2d(v._modules['1'].out_channels,
                          self.cfg['TOP_DOWN_PYRAMID_SIZE'],
                          kernel_size=1)
            ]
            loc_layers += [
                nn.Conv2d(self.cfg['TOP_DOWN_PYRAMID_SIZE'],
                          mbox[k] * 4,
                          kernel_size=3,
                          padding=1)
            ]
            conf_layers += [
                nn.Conv2d(self.cfg['TOP_DOWN_PYRAMID_SIZE'],
                          mbox[k] * num_classes,
                          kernel_size=3,
                          padding=1)
            ]
        return nn.ModuleList(mobile_layers), nn.ModuleList(extra_layers), \
            nn.ModuleList(conv_layers), nn.ModuleList(fpn_layers), \
            nn.ModuleList(loc_layers), nn.ModuleList(conf_layers)

    def build_ssd(self, backbone, size, num_classes):
        mobile_layers = []
        extra_layers = []
        loc_layers = []
        conf_layers = []

        # build backbone network
        if backbone == 'mobilenetv3_small':
            base_model = mobilenetv3_small(num_classes=num_classes,
                                           include_top=False)
            mobile_layers += base_model.get_layers()
        else:
            base_model = mobilenetv3_large(num_classes=num_classes,
                                           include_top=False)
            mobile_layers += base_model.get_layers()

        # build extras network on the top of the backbone
        in_channels = 96
        for k, v in enumerate(self.cfg['extras'][str(size)]):
            extra_layers.append(
                nn.Sequential(
                    nn.Conv2d(in_channels, v, kernel_size=1, stride=1),
                    nn.Conv2d(v,
                              v,
                              kernel_size=3,
                              stride=2,
                              padding=1,
                              groups=v),
                    nn.Conv2d(v, v * 2, kernel_size=1, stride=1)))
            in_channels = v * 2

        # build fpn and classify/regression layers
        mbox = self.cfg['mbox'][str(size)]
        for k, v in enumerate(self.cfg['net_source']):
            loc_layers += [
                nn.Conv2d(mobile_layers[v].conv._modules['0'].out_channels,
                          mbox[k] * 4,
                          kernel_size=3,
                          padding=1)
            ]
            conf_layers += [
                nn.Conv2d(mobile_layers[v].conv._modules['0'].out_channels,
                          mbox[k] * num_classes,
                          kernel_size=3,
                          padding=1)
            ]
        for k, v in enumerate(extra_layers, 4):
            loc_layers += [
                nn.Conv2d(v._modules['1'].out_channels,
                          mbox[k] * 4,
                          kernel_size=3,
                          padding=1)
            ]
            conf_layers += [
                nn.Conv2d(v._modules['1'].out_channels,
                          mbox[k] * num_classes,
                          kernel_size=3,
                          padding=1)
            ]
        return nn.ModuleList(mobile_layers), nn.ModuleList(extra_layers), \
            nn.ModuleList(loc_layers), nn.ModuleList(conf_layers)

    def to_cuda(self):
        self.priors = self.priors.cuda()
        self.cuda()
        return self
Beispiel #18
0
if len(gpu_ids) > 1:
    net = torch.nn.DataParallel(net, device_ids=gpu_ids)

#device = torch.device(args.device)
device = torch.device('cuda:' + str(gpu_ids[0]))
cudnn.benchmark = True
net = net.to(device)

optimizer = optim.SGD(net.parameters(),
                      lr=initial_lr,
                      momentum=momentum,
                      weight_decay=weight_decay)
criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 3, 0.35, False,
                         False)

priorbox = PriorBox(cfg, image_size=(img_dim, img_dim))
with torch.no_grad():
    priors = priorbox.forward()
    priors = priors.to(device)


def train():

    net.train()

    #load the two dataset for face rectangles and landmarks respectively
    print('Loading Dataset...')
    dataset_rect = FaceRectLMDataset(training_face_rect_dir, img_dim, rgb_mean)
    dataset_landmark = FaceRectLMDataset(training_face_landmark_dir, img_dim,
                                         rgb_mean)
    mbox_loc = Reshape((num_boxes, 4), name='mbox_loc_final')(mbox_loc)
    mbox_conf = Reshape((num_boxes, num_classes),
                        name='mbox_conf_logits')(mbox_conf)
    mbox_conf = Activation('softmax', name='mbox_conf_final')(mbox_conf)
    predictions = concatenate([mbox_loc, mbox_conf],
                              axis=2,
                              name='predictions')

    model = Model(inputs=input_layer, outputs=predictions)

    if weights_path is not None:
        model.load_weights(weights_path, by_name=True)

    if frozen_layers is not None:
        for layer in model.layers:
            if layer.name in frozen_layers:
                layer.trainable = False

    return model


if __name__ == "__main__":
    from prior_box import PriorBox
    model = SSD300()
    print(model.output_shape)
    prior_box = PriorBox()
    prior_boxes = prior_box.forward()
    prior_boxes = prior_boxes.numpy()
    print(prior_boxes.shape)
    model.summary()
Beispiel #20
0
def test_net(save_folder, annopath, net, im_size=300, thresh=0.05):

    torch.set_grad_enabled(False)

    df = pd.read_csv(annopath)
    filenames = df['filename'].unique()

    all_img_boxes = []

    filenames = ['/root/face_mask_lmks_detection/test_images/test.jpg']
    resize = 1
    # testing begin
    for i, image_path in enumerate(filenames):
        img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
        img = np.float32(img_raw)
        im_height, im_width, _ = img.shape

        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1],
             img.shape[0]])  # w h w h
        img -= (104, 117, 123)

        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.cuda()
        prior_data = priors.data

        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.cuda()
        scale = scale.cuda()

        tic = time.time()
        loc, conf = net(img)  # forward pass

        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()

        # remove batch dim, as test only for single img
        scores = F.softmax(conf, dim=-1).squeeze(
            0).data.cpu().numpy()[:, 1:]  # conf : batch, num anchors, 3
        # we need to max scores for each anchor
        labels = np.argmax(scores, axis=-1)
        scores = np.max(scores, axis=-1)  # scores : number anchors,

        if len(scores) == 0:
            # todo
            pass

        keep_idx = single_class_non_max_suppression(boxes, scores, 0.6, 0.5)

        per_img_bboxes = []

        for idx in keep_idx:
            conf = float(scores[idx])
            class_id = labels[idx]
            bbox = boxes[idx]

            text = "{:.4f}".format(conf)

            # clip the coordinate, avoid the value exceed the image boundary.
            xmin = max(0, int(bbox[0]))
            ymin = max(0, int(bbox[1]))
            xmax = min(int(bbox[2]), im_width)
            ymax = min(int(bbox[3]), im_height)

            per_img_bboxes.append([xmin, ymin, xmax, ymax, conf, class_id])

            if int(class_id) == 1:
                color = (0, 255, 0)
            else:
                color = (0, 0, 255)

            cv2.rectangle(img_raw, (xmin, ymin), (xmax, ymax), color, 2)

            cv2.putText(img_raw, text, (xmin, ymin + 12),
                        cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

        cv2.imwrite('./result.jpg', img_raw)

        all_img_boxes.append(per_img_bboxes)

        print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, len(filenames),
                                                    time.time() - tic))
Beispiel #21
0
    # load net
    args = params()
    use_cuda = args.cuda
    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)
    # use_cuda = torch.cuda.is_available()
    # if use_cuda:
    #     torch.set_default_tensor_type('torch.cuda.FloatTensor')
    # else:
    #     torch.set_default_tensor_type('torch.FloatTensor')
    net = S3FD(cfg.NUM_CLASSES, cfg.NumAnchor)
    net.load_state_dict(torch.load(args.trained_model))
    net.eval()
    # detector = Detect(cfg)
    detector = DetectIou(cfg)
    anchors = PriorBox()
    priors = anchors()
    if use_cuda:
        net.cuda()
        cudnn.benckmark = True
    print('finish loading model')
    if args.dataname == 'scut':
        dataset = VOCDetection(cfg.HEAD.DIR,
                               image_sets=[(args.dataset, 'test')],
                               target_transform=VOCAnnotationTransform(),
                               mode='test',
                               dataset_name='SCUT')
    elif args.dataname == 'crowedhuman':
        dataset = ReadDataset(args.val_file, args.voc_root, train_mode='test')
    test_net(net, detector, priors, dataset, use_cuda, args)
Beispiel #22
0
class TinySSD(nn.Module):
    def __init__(self, training=True):
        super(TinySSD, self).__init__()
        self.basenet = EfficientNet.from_name('efficientnet-b0')
        self.training = training
        for idx, num_anchors in enumerate([4, 6, 6, 4, 4]):
            setattr(self, "predict_bbox_{}".format(idx + 1),
                    nn.Conv2d(320, num_anchors * 4, kernel_size=3, padding=1))
            setattr(
                self,
                "predict_class_{}".format(idx + 1),
                nn.Conv2d(  # 这里3 是 2 + 1
                    320,
                    3 * num_anchors,
                    kernel_size=3,
                    padding=1))
        self.priors = None
        for idx, k in enumerate([[320, 320], [320, 320], [320, 320]]):
            setattr(
                self, "feature_{}".format(idx + 2),
                nn.Sequential(nn.Conv2d(k[0], k[1], kernel_size=3, padding=1),
                              nn.BatchNorm2d(k[1]), nn.ReLU(),
                              nn.Conv2d(k[1], k[1], kernel_size=3, padding=1),
                              nn.BatchNorm2d(k[1]), nn.ReLU(),
                              nn.MaxPool2d(2)))

    def forward(self, x):
        x = self.basenet.extract_features(x)
        feature_1 = x
        feature_2 = self.feature_2(x)
        feature_3 = self.feature_3(feature_2)
        feature_4 = self.feature_4(feature_3)
        feature_5 = F.max_pool2d(feature_4, kernel_size=2)
        '''
        (2,4*4,16,16)
        (2,4*6,8,8)
        (2,4*6,4,4),
        (2,4*4,2,2),
        (2,4*4,1,1)

        -> 每个 anchor 中心,连续4个值代表x y w h
        '''
        confidences = []
        locations = []
        locations.append(
            self.predict_bbox_1(feature_1).permute(0, 2, 3, 1).contiguous())
        locations.append(
            self.predict_bbox_2(feature_2).permute(0, 2, 3, 1).contiguous())
        locations.append(
            self.predict_bbox_3(feature_3).permute(0, 2, 3, 1).contiguous())
        locations.append(
            self.predict_bbox_4(feature_4).permute(0, 2, 3, 1).contiguous())
        locations.append(
            self.predict_bbox_5(feature_5).permute(0, 2, 3, 1).contiguous())
        locations = torch.cat([o.view(o.size(0), -1) for o in locations],
                              1)  #(batch_size,total_anchor_num*4)
        locations = locations.view(locations.size(0), -1,
                                   4)  # (batch_size,total_anchor_num,4)

        confidences.append(
            self.predict_class_1(feature_1).permute(0, 2, 3, 1).contiguous())
        confidences.append(
            self.predict_class_2(feature_2).permute(0, 2, 3, 1).contiguous())
        confidences.append(
            self.predict_class_3(feature_3).permute(0, 2, 3, 1).contiguous())
        confidences.append(
            self.predict_class_4(feature_4).permute(0, 2, 3, 1).contiguous())
        confidences.append(
            self.predict_class_5(feature_5).permute(0, 2, 3, 1).contiguous())
        confidences = torch.cat([o.view(o.size(0), -1) for o in confidences],
                                1)  #(batch_size,total_anchor_num*4)
        confidences = confidences.view(confidences.size(0), -1,
                                       3)  # (batch_size,total_anchor_num,4)
        if not self.training:
            if self.priors is None:
                self.priors = PriorBox()()
                self.priors = self.priors.cuda()
            boxes = convert_locations_to_boxes(locations, self.priors, 0.1,
                                               0.2)
            confidences = F.softmax(confidences, dim=2)
            return confidences, boxes
        else:
            #print(confidences.size(),locations.size())
            return (confidences, locations)  #  (2,1111,3) (2,1111,4)
Beispiel #23
0
class RefineDet(nn.Module):
    """Single Shot Multibox Architecture
    The network is composed of a base VGG network followed by the
    added multibox conv layers.  Each multibox layer branches into
        1) conv2d for class conf scores
        2) conv2d for localization predictions
        3) associated priorbox layer to produce default bounding
           boxes specific to the layer's feature map size.
    See: RefineDet for more details.
    Args:
        size: input image size
        base: VGG16 layers for input, size of either 320 or 512
        extras: extra layers that feed to multibox loc and conf layers
        ARM: "default box head" consists of loc and conf conv layers
        ODM: "multibox head" consists of loc and conf conv layers
        TCB: converting the features from the ARM to the ODM for detection
        numclass:ODM output classes
    """
    def __init__(self, base, extras, ARM, ODM, TCB, num_classes):
        super(RefineDet, self).__init__()
        self.num_classes = num_classes
        self.priorbox = PriorBox(cfgs.PriorBox_Cfg[str(cfgs.ImgSize)])
        with torch.no_grad():
            self.priors = self.priorbox.forward()
        # SSD network
        self.vgg = nn.ModuleList(base)
        # Layer learns to scale the l2 normalized features from conv4_3
        self.conv4_3_L2Norm = L2Norm(512, 10)
        self.conv5_3_L2Norm = L2Norm(512, 8)
        self.extras = nn.ModuleList(extras)
        self.arm_loc = nn.ModuleList(ARM[0])
        self.arm_conf = nn.ModuleList(ARM[1])
        self.odm_loc = nn.ModuleList(ODM[0])
        self.odm_conf = nn.ModuleList(ODM[1])
        #self.tcb = nn.ModuleList(TCB)
        self.tcb0 = nn.ModuleList(TCB[0])
        self.tcb1 = nn.ModuleList(TCB[1])
        self.tcb2 = nn.ModuleList(TCB[2])

    def forward(self, x):
        """Applies network layers and ops on input image(s) x.
        Args:
            x: input image or batch of images. Shape: [batch,3,w,h].
        Return:
            list of concat outputs from:
                1: confidence layers, Shape: [batch*num_priors,num_classes]
                2: localization layers, Shape: [batch,num_priors*4]
                3: priorbox layers, Shape: [2,num_priors*4]
        """
        sources = list()
        tcb_source = list()
        arm_loc = list()
        arm_conf = list()
        odm_loc = list()
        odm_conf = list()
        odm_conf_debug = list()
        # apply vgg up to conv4_3 relu and conv5_3 relu
        for k in range(30):
            x = self.vgg[k](x)
            if 22 == k:
                s = self.conv4_3_L2Norm(x)
                sources.append(s)
            elif 29 == k:
                s = self.conv5_3_L2Norm(x)
                sources.append(s)
        # apply vgg up to fc7
        for k in range(30, len(self.vgg)):
            x = self.vgg[k](x)
        sources.append(x)
        # apply extra layers and cache source layer outputs
        for k, v in enumerate(self.extras):
            x = F.relu(v(x), inplace=True)
            if k % 2 == 1:
                sources.append(x)
        # apply ARM and ODM to source layers
        for (x, l, c) in zip(sources, self.arm_loc, self.arm_conf):
            arm_loc.append(l(x).permute(0, 2, 3, 1).contiguous())
            arm_conf.append(c(x).permute(0, 2, 3, 1).contiguous())
        arm_loc = torch.cat([tmp.view(tmp.size(0), -1) for tmp in arm_loc], 1)
        arm_conf = torch.cat([tmp.view(tmp.size(0), -1) for tmp in arm_conf], 1)
        #print([x.size() for x in sources])
        # calculate TCB features
        #print([x.size() for x in sources])
        p = None
        for k, v in enumerate(sources[::-1]):
            s = v
            for i in range(3):
                s = self.tcb0[(3-k)*3 + i](s)
                #print(s.size())
            if k != 0:
                u = p
                u = self.tcb1[3-k](u)
                s += u
            for i in range(3):
                s = self.tcb2[(3-k)*3 + i](s)
            p = s
            tcb_source.append(s)
        #print([x.size() for x in tcb_source])
        tcb_source.reverse()
        # apply ODM to source layers
        for (x, l, c) in zip(tcb_source, self.odm_loc, self.odm_conf):
            odm_loc.append(l(x).permute(0, 2, 3, 1).contiguous())
            odm_conf.append(c(x).permute(0, 2, 3, 1).contiguous())
        odm_conf_debug = odm_conf
        odm_loc = torch.cat([tmp.view(tmp.size(0), -1) for tmp in odm_loc], 1)
        odm_conf = torch.cat([tmp.view(tmp.size(0), -1) for tmp in odm_conf], 1)
        #print(arm_loc.size(), arm_conf.size(), odm_loc.size(), odm_conf.size())
        output = (
                arm_loc.view(arm_loc.size(0), -1, 4),
                arm_conf.view(arm_conf.size(0), -1, 2),
                odm_loc.view(odm_loc.size(0), -1, 4),
                odm_conf.view(odm_conf.size(0), -1, self.num_classes),
                self.priors,
                odm_conf_debug
            )
        return output

    def load_weights(self, base_file):
        other, ext = os.path.splitext(base_file)
        #device = torch.device('cpu')
        if ext == '.pkl' or '.pth':
            print('Loading weights into state dict...')
            self.load_state_dict(torch.load(base_file),strict=False)
            print('Finished!')
        else:
            print('Sorry only .pth and .pkl files supported.')
    # net and model
    net = RetinaFace(cfg=cfg, phase='test')
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()
    print('Finished loading model!')
    print(net)
    cudnn.benchmark = True  #decides optimal model for execution - need to change if input size is changing
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)

    resize = 1
    num = 1

    # testing begin
    folder = '/home/siddhartha/Siddhartha/Reveal-Media/Reveal Videos/redaction/train/7'
    priorbox = PriorBox(cfg, image_size=(720, 1280))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    exception = []
    for filename in os.listdir(folder):
        img_raw = cv2.imread(os.path.join(folder, filename))
        if img_raw is not None:
            imgpth = os.path.join(folder, filename)
        A, B, _ = img_raw.shape
        print(A, B)
        #img_raw = cv2.resize(img_raw,(720,720))
        #image_path = "./curve/test.jpg"
        #img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)

        img = np.float32(img_raw)
Beispiel #25
0
def main():
    args = params()
    logger = createlogger(args.log_dir)
    net, optimizer, criterion, train_loader, val_loader = train_net(args)
    start_epoch = args.start_epoch
    iteration = 0
    net.train()
    rgb_mean = np.array([123., 117., 104.])[np.newaxis,
                                            np.newaxis, :].astype('float32')
    loss_hist = collections.deque(maxlen=200)
    loss_class_min = 10.0
    loss_reg_min = 10.0
    prior_box = PriorBox()
    with torch.no_grad():
        priors = prior_box()
    for epoch in range(start_epoch, cfg.EPOCHES):
        #losses = 0
        lr = poly_lr_scheduler(optimizer, args.lr, epoch, max_iter=cfg.EPOCHES)
        for batch_idx, (images, targets) in enumerate(train_loader):
            if args.cuda:
                images = images.cuda()  #Variable(images.cuda())
                targets = [ann.cuda() for ann in targets]
            '''
            conf_t = test_anchor(targets,priors,cfg)
            images = images.cpu().numpy()
            for i in range(args.batch_size):
                tmp_img = np.transpose(images[i],(1,2,0))
                tmp_img = tmp_img + rgb_mean
                #tmp_img = tmp_img * 255
                tmp_img = np.array(tmp_img,dtype=np.uint8)
                tmp_img = cv2.cvtColor(tmp_img,cv2.COLOR_RGB2BGR)
                h,w = tmp_img.shape[:2]
                if len(targets[i])>0:
                    gt = targets[i].cpu().numpy()
                    for j in range(gt.shape[0]):
                        x1,y1 = int(gt[j,0]*w),int(gt[j,1]*h)
                        x2,y2 = int(gt[j,2]*w),int(gt[j,3]*h)
                        # print('pred',x1,y1,x2,y2,gt[j,4],w,h)
                        if x2 >x1 and y2 >y1:
                            cv2.rectangle(tmp_img,(x1,y1),(x2,y2),(0,0,255))
                for j in range(priors.size(0)):
                    if conf_t[i,j] >0:
                        box = priors[j].cpu().numpy()
                        # print(box)
                        x1,y1 = box[:2] - box[2:] / 2
                        x2,y2 = box[:2] + box[2:] / 2
                        x1,y1 = int(x1*w),int(y1*h)
                        x2,y2 = int(x2*w),int(y2*h)
                        cv2.rectangle(tmp_img,(x1,y1),(x2,y2),(255,0,0))
                cv2.imshow('src',tmp_img)
                cv2.waitKey(0)
            '''
            # if iteration in cfg.LR_STEPS:
            #     step_index += 1
            #     adjust_learning_rate(args.lr,optimizer, args.gamma, step_index)
            # t0 = time.time()
            out = net(images)
            # backprop
            optimizer.zero_grad()
            # loss_l, loss_c,loss_iou = criterion(out,priors, targets)
            loss_l, loss_c = criterion(out, priors, targets)
            loss = loss_l + loss_c
            loss.backward()
            optimizer.step()
            # t1 = time.time()
            loss_hist.append(float(loss.item()))
            if iteration % 100 == 0:
                #tloss = losses / 100.0
                #print('tl',loss.data,tloss)
                logger.info(
                    'epoch:{} || iter:{} || tloss:{:.4f}, confloss:{:.4f}, locloss:{:.4f} || lr:{:.6f}'
                    .format(epoch, iteration, np.mean(loss_hist),
                            loss_c.item(), loss_l.item(), lr))
                #losses = 0
            if iteration != 0 and iteration % 100 == 0:
                tmpl, tmpc = val(args, net, val_loader, criterion, priors,
                                 logger)
                if tmpl < loss_reg_min or tmpc < loss_class_min:
                    loss_reg_min = tmpl
                    loss_class_min = tmpc
                    logger.info('Saving state, iter: %d' % iteration)
                    sfile = 'sfd_' + args.dataset + '_best.pth'
                    spath = os.path.join(args.save_folder, sfile)
                    if args.multigpu:
                        torch.save(net.module.state_dict(), spath)
                    else:
                        torch.save(net.state_dict(), spath)
            iteration += 1
        #val(args,net,val_loader,criterion)
        if iteration == cfg.MAX_STEPS:
            break