def __init__(self, phase, num_classes): super(BlazeFace, self).__init__() self.phase = phase self.num_classes = num_classes self.conv_1 = nn.Conv2d(3, 24, kernel_size=3, stride=2, padding=1, bias=True) self.bn_1 = nn.BatchNorm2d(24) self.relu = nn.ReLU(inplace=True) self.blaze_1 = BlazeBlock(24, 24) self.blaze_2 = BlazeBlock(24, 24) self.blaze_3 = BlazeBlock(24, 48, stride=2) self.blaze_4 = BlazeBlock(48, 48) self.blaze_5 = BlazeBlock(48, 48) self.blaze_6 = BlazeBlock(48, 24, 96, stride=2) self.blaze_7 = BlazeBlock(96, 24, 96) self.blaze_8 = BlazeBlock(96, 24, 96) self.blaze_9 = BlazeBlock(96, 24, 96, stride=2) self.blaze_10 = BlazeBlock(96, 24, 96) self.blaze_11 = BlazeBlock(96, 24, 96) self.apply(initialize) self.head = mbox([self.blaze_9, self.blaze_10], [2, 6], 2) self.loc = nn.ModuleList(self.head[0]) self.conf = nn.ModuleList(self.head[1]) self.cfg = (wider_face) # print(self.cfg) self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
def get_model(cls): """Get the model object for this instance, loading it if it's not already loaded.""" trained_model = '/opt/ml/model/m2det512_vgg.pth' #trained_model = '../../m2det512_vgg.pth' anchor_config = anchors(cfg) print_info('The Anchor info: \n{}'.format(anchor_config)) priorbox = PriorBox(anchor_config) net = build_net('test', size=cfg.model.input_size, config=cfg.model.m2det_config) init_net(net, cfg, trained_model) print_info('===> Finished constructing and loading model', ['yellow', 'bold']) net.eval() with torch.no_grad(): priors = priorbox.forward() if cfg.test_cfg.cuda: net = net.cuda() priors = priors.cuda() cudnn.benchmark = True else: net = net.cpu() _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) return net, priors, _preprocess, detector
def main(): global args args = arg_parse() bgr_means = (104, 117, 123) dataset_name = args.dataset size = args.size top_k = args.top_k thresh = args.confidence_threshold use_refine = False if args.version.split("_")[0] == "refine": use_refine = True if dataset_name[0] == "V": cfg = cfg_dict["VOC"][args.version][str(size)] trainvalDataset = VOCDetection dataroot = VOCroot targetTransform = AnnotationTransform() valSet = datasets_dict["VOC2007"] classes = VOC_CLASSES else: cfg = cfg_dict["COCO"][args.version][str(size)] trainvalDataset = COCODetection dataroot = COCOroot targetTransform = None valSet = datasets_dict["COCOval"] classes = COCO_CLASSES num_classes = cfg['num_classes'] save_folder = args.save_folder if not os.path.exists(save_folder): os.mkdir(save_folder) if args.cuda and torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') net = model_builder(args.version, cfg, "test", int(size), num_classes, args.channel_size) state_dict = torch.load(args.weights) from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) detector = Detect(num_classes, 0, cfg, use_arm=use_refine) img_wh = cfg["img_wh"] ValTransform = BaseTransform(img_wh, bgr_means, (2, 0, 1)) input_folder = args.images for item in os.listdir(input_folder)[:]: img_path = os.path.join(input_folder, item) img = cv2.imread(img_path) dets = im_detect(img, net, detector, cfg, ValTransform, thresh) draw_img = draw_rects(img, dets, classes) out_img_name = "output_" + item save_path = os.path.join(save_folder, out_img_name) cv2.imwrite(save_path, img)
def main(): global args args = arg_parse() cfg_from_file(args.cfg_file) bgr_means = cfg.TRAIN.BGR_MEAN dataset_name = cfg.DATASETS.DATA_TYPE batch_size = cfg.TEST.BATCH_SIZE num_workers = args.num_workers if cfg.DATASETS.DATA_TYPE == 'VOC': trainvalDataset = VOCDetection top_k = 200 else: trainvalDataset = COCODetection top_k = 300 dataroot = cfg.DATASETS.DATAROOT if cfg.MODEL.SIZE == '300': size_cfg = cfg.SMALL else: size_cfg = cfg.BIG valSet = cfg.DATASETS.VAL_TYPE num_classes = cfg.MODEL.NUM_CLASSES save_folder = args.save_folder if not os.path.exists(save_folder): os.mkdir(save_folder) torch.set_default_tensor_type('torch.cuda.FloatTensor') cfg.TRAIN.TRAIN_ON = False net = SSD(cfg) checkpoint = torch.load(args.weights) state_dict = checkpoint['model'] from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) detector = Detect(cfg) ValTransform = BaseTransform(size_cfg.IMG_WH, bgr_means, (2, 0, 1)) val_dataset = trainvalDataset(dataroot, valSet, ValTransform, "val") val_loader = data.DataLoader(val_dataset, batch_size, shuffle=False, num_workers=num_workers, collate_fn=detection_collate) top_k = 300 thresh = cfg.TEST.CONFIDENCE_THRESH eval_net(val_dataset, val_loader, net, detector, cfg, ValTransform, top_k, thresh=thresh, batch_size=batch_size)
class Pelee_Det(object): def __init__(self): self.anchor_config = anchors(cfg.model) self.priorbox = PriorBox(self.anchor_config) self.net = build_net('test', cfg.model.input_size, cfg.model) init_net(self.net, cfg, args.trained_model) self.net.eval() self.num_classes = cfg.model.num_classes with torch.no_grad(): self.priors = self.priorbox.forward() self.net = self.net.cuda() self.priors = self.priors.cuda() cudnn.benchmark = True self._preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) self.detector = Detect(num_classes, cfg.loss.bkg_label, self.anchor_config) def detect(self, image): loop_start = time.time() w, h = image.shape[1], image.shape[0] img = self._preprocess(image).unsqueeze(0) if cfg.test_cfg.cuda: img = img.cuda() scale = torch.Tensor([w, h, w, h]) out = self.net(img) boxes, scores = self.detector.forward(out, self.priors) boxes = (boxes[0] * scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] count = 0 # for j in [2, 6, 7, 14, 15]: for j in range(1, len(ch_labels)): inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) soft_nms = cfg.test_cfg.soft_nms keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms) keep = keep[:cfg.test_cfg.keep_per_class] c_dets = c_dets[keep, :] allboxes.extend([_.tolist() + [j] for _ in c_dets]) loop_time = time.time() - loop_start allboxes = np.array(allboxes) boxes = allboxes[:, :4] scores = allboxes[:, 4] cls_inds = allboxes[:, 5] infos, im2show = draw_detection(image, boxes, scores, cls_inds, -1, args.thresh) return infos, im2show
def __init__(self, img_size=300, thresh=0.56): assert img_size == 300 or img_size == 512, 'net input image size must be 300 or 512' self.labels_name = LABELS_SET self.labels_numb = len(LABELS_SET) self.img_size = img_size self.cfg = VOC_300 if img_size == 300 else VOC_512 self.thresh = thresh self.gpu_is_available = torch.cuda.is_available() self.gpu_numb = torch.cuda.device_count() self.net = build_net('test', self.img_size, self.labels_numb) self.detect = Detect(self.labels_numb, 0, self.cfg) self.transform = BaseTransform(self.img_size) # load net weights state_dict = torch.load(trained_model, map_location='cpu') new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v self.net.load_state_dict(new_state_dict) self.net.eval() print('Finished loading model!') if self.gpu_numb > 1: self.net = torch.nn.DataParallel(self.net, device_ids=list( range(self.gpu_numb))) # set net gpu or cpu model if self.gpu_is_available: self.net.cuda() cudnn.benchmark = True # define box generator priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = priorbox.forward() if self.gpu_is_available: self.priors = self.priors.cuda()
def __init__(self): self.anchor_config = anchors(cfg.model) self.priorbox = PriorBox(self.anchor_config) self.net = build_net('test', cfg.model.input_size, cfg.model) init_net(self.net, cfg, args.trained_model) self.net.eval() self.num_classes = cfg.model.num_classes with torch.no_grad(): self.priors = self.priorbox.forward() # self.net = self.net.cuda() # self.priors = self.priors.cuda() cudnn.benchmark = True self._preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) self.detector = Detect(num_classes, cfg.loss.bkg_label, self.anchor_config)
def test_model(trained_model): # load net img_dim = (300, 512)[args.size == '512'] num_classes = (21, 81)[args.dataset == 'COCO'] net = build_net('test', img_dim, num_classes) # initialize detector state_dict = torch.load(trained_model) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) net.eval() print('Finished loading model!') # print(net) # load data if args.dataset == 'VOC': testset = VOCDetection(VOCroot, [('2007', 'test')], None, AnnotationTransform()) elif args.dataset == 'VOC2012': testset = VOCDetection(VOCroot, [('2012', 'test')], None, AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection(COCOroot, [('2014', 'minival')], None) # COCOroot, [('2015', 'test-dev')], None) else: print('Only VOC and COCO dataset are supported now!') if args.cuda: net = net.cuda() cudnn.benchmark = True else: net = net.cpu() # evaluation #top_k = (300, 200)[args.dataset == 'COCO'] top_k = 200 detector = Detect(num_classes, 0, cfg) save_folder = os.path.join(args.save_folder, args.dataset) rgb_means = ((104, 117, 123), (103.94, 116.78, 123.68))[args.version == 'RFB_mobile'] test_net(save_folder, net, detector, args.cuda, testset, BaseTransform(net.size, rgb_means, (2, 0, 1)), top_k, thresh=0.01)
def main(): global args args = arg_parse() ssh_run_param(args) cfg_from_file(args.cfg_file) bgr_means = cfg.TRAIN.BGR_MEAN dataset_name = cfg.DATASETS.DATA_TYPE batch_size = cfg.TEST.BATCH_SIZE num_workers = args.num_workers if cfg.DATASETS.DATA_TYPE == 'VOC': trainvalDataset = VOCDetection classes = VOC_CLASSES top_k = 200 else: trainvalDataset = COCODetection classes = COCO_CLASSES top_k = 300 valSet = cfg.DATASETS.VAL_TYPE num_classes = cfg.MODEL.NUM_CLASSES save_folder = args.save_folder if not os.path.exists(save_folder): os.mkdir(save_folder) torch.set_default_tensor_type('torch.cuda.FloatTensor') cfg.TRAIN.TRAIN_ON = False net = SSD(cfg) checkpoint = torch.load(args.weights) state_dict = checkpoint['model'] from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) detector = Detect(cfg) img_wh = cfg.TEST.INPUT_WH ValTransform = BaseTransform(img_wh, bgr_means, (2, 0, 1)) input_folder = args.images thresh = cfg.TEST.CONFIDENCE_THRESH for item in os.listdir(input_folder)[2:3]: img_path = os.path.join(input_folder, item) print(img_path) img = cv2.imread(img_path) dets = im_detect(img, net, detector, ValTransform, thresh) draw_img = draw_rects(img, dets, classes) out_img_name = "output_" + item save_path = os.path.join(save_folder, out_img_name) cv2.imwrite(save_path, img)
def _init_model(self): if torch.cuda.is_available(): cuda = True if '300' in self.model_path: cfg = COCO_300 self.img_dim = 300 print('Model input size is 300') else: cfg = COCO_512 self.img_dim = 512 print('Model input size is 512') priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() if cuda: self.priors = priors.cuda() self.net = build_rfb_vgg_net('test', self.img_dim, self.num_classes) # initialize detector state_dict = torch.load(self.model_path)['state_dict'] # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v self.net.load_state_dict(new_state_dict) self.net.eval() if cuda: self.net = self.net.cuda() cudnn.benchmark = True else: self.net = self.net.cpu() print('Finished loading model!') # print(net) self.detector = Detect(self.num_classes, 0, cfg)
def __init__(self, args): super(VggStride16, self).__init__() self.phase = args.phase self.num_classes = args.num_classes self.priors = Variable(PriorBox(vggstride16_config).forward(), volatile=True) self.crop_size = args.crop_size self.vgg = nn.ModuleList(vgg(base[self.crop_size], 3,)) self.n_anchor = len(vggstride16_config['scales']) * (len(vggstride16_config['aspect_ratios'][0] * 2) + 1) self.loc_layers = nn.Conv2d(self.vgg[-2].out_channels, self.n_anchor * 4, kernel_size=3, padding=1) self.cls_layers = nn.Conv2d(self.vgg[-2].out_channels, self.n_anchor * self.num_classes, kernel_size=3, padding=1) self.softmax = nn.Softmax() if self.phase == 'test': self.detect = Detect(self.num_classes, 0, 200, 0.01, 0.45) # conf 0.01
def __init__(self, phase, size, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = (coco, voc)[num_classes == 21] self.size = size # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) self.relu = nn.ReLU(inplace=True) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
def get_voc_reader(args): img_dim = args.size rgb_means = (104, 117, 123) rgb_std = (1, 1, 1) p = (0.6, 0.2)[args.version == 'RFB_mobile'] train_sets = [('2007', 'trainval'), ('2012', 'trainval')] cfg = (VOC_300, VOC_512)[args.size == '512'] testset = VOCDetection(VOCroot, [('2007', 'test')], None, AnnotationTransform()) train_dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, rgb_std, p), AnnotationTransform()) trainloader = torch.utils.data.DataLoader(train_dataset, args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate) num_classes = len(args.classes.split(",")) detector = Detect(num_classes, 0, cfg) return (trainloader, (testset, detector))
print_info('Detect time per image: {:.3f}s'.format(tot_detect_time / (num_images-1))) print_info('Nms time per image: {:.3f}s'.format(tot_nms_time / (num_images - 1))) print_info('Total time per image: {:.3f}s'.format((tot_detect_time + tot_nms_time) / (num_images - 1))) print_info('FPS: {:.3f} fps'.format((num_images - 1) / (tot_detect_time + tot_nms_time))) if __name__ == '__main__': net = build_net('test', size = cfg.model.input_size, config = cfg.model.m2det_config) init_net(net, cfg, args.trained_model) print_info('===> Finished constructing and loading model',['yellow','bold']) net.eval() _set = 'eval_sets' if not args.test else 'test_sets' testset = get_dataloader(cfg, args.dataset, _set) if cfg.test_cfg.cuda: net = net.cuda() cudnn.benchmark = True else: net = net.cpu() detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) save_folder = os.path.join(cfg.test_cfg.save_folder, args.dataset) _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) test_net(save_folder, net, detector, cfg.test_cfg.cuda, testset, transform = _preprocess, max_per_image = cfg.test_cfg.topk, thresh = cfg.test_cfg.score_threshold)
size=cfg.model.input_size, config=cfg.model.m2det_config) init_net(net, cfg, args.trained_model) print_info('===> Finished constructing and loading model', ['yellow', 'bold']) net.eval() with torch.no_grad(): priors = priorbox.forward() if cfg.test_cfg.cuda: net = net.cuda() priors = priors.cuda() cudnn.benchmark = True else: net = net.cpu() _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) def _to_color(indx, base): """ return (b, r, g) tuple""" base2 = base * base b = 2 - indx / base2 r = 2 - (indx % base2) / base g = 2 - (indx % base2) % base return b * 127, r * 127, g * 127 base = int(np.ceil(pow(cfg.model.m2det_config.num_classes, 1. / 3))) colors = [ _to_color(x, base) for x in range(cfg.model.m2det_config.num_classes) ]
def main(): global args args = arg_parse() cfg_from_file(args.cfg_file) save_folder = args.save_folder batch_size = cfg.TRAIN.BATCH_SIZE bgr_means = cfg.TRAIN.BGR_MEAN p = 0.6 gamma = cfg.SOLVER.GAMMA momentum = cfg.SOLVER.MOMENTUM weight_decay = cfg.SOLVER.WEIGHT_DECAY size = cfg.MODEL.SIZE thresh = cfg.TEST.CONFIDENCE_THRESH if cfg.DATASETS.DATA_TYPE == 'VOC': trainvalDataset = VOCDetection top_k = 1000 else: trainvalDataset = COCODetection top_k = 1000 dataset_name = cfg.DATASETS.DATA_TYPE dataroot = cfg.DATASETS.DATAROOT trainSet = cfg.DATASETS.TRAIN_TYPE valSet = cfg.DATASETS.VAL_TYPE num_classes = cfg.MODEL.NUM_CLASSES start_epoch = args.resume_epoch epoch_step = cfg.SOLVER.EPOCH_STEPS end_epoch = cfg.SOLVER.END_EPOCH if not os.path.exists(save_folder): os.mkdir(save_folder) torch.set_default_tensor_type('torch.cuda.FloatTensor') net = SSD(cfg) print(net) if cfg.MODEL.SIZE == '300': size_cfg = cfg.SMALL else: size_cfg = cfg.BIG optimizer = optim.SGD( net.parameters(), lr=cfg.SOLVER.BASE_LR, momentum=momentum, weight_decay=weight_decay) if args.resume_net != None: checkpoint = torch.load(args.resume_net) state_dict = checkpoint['model'] from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) optimizer.load_state_dict(checkpoint['optimizer']) print('Loading resume network...') if args.ngpu > 1: net = torch.nn.DataParallel(net) net.cuda() cudnn.benchmark = True criterion = list() if cfg.MODEL.REFINE: detector = Detect(cfg) arm_criterion = RefineMultiBoxLoss(cfg, 2) odm_criterion = RefineMultiBoxLoss(cfg, cfg.MODEL.NUM_CLASSES) criterion.append(arm_criterion) criterion.append(odm_criterion) else: detector = Detect(cfg) ssd_criterion = MultiBoxLoss(cfg) criterion.append(ssd_criterion) TrainTransform = preproc(size_cfg.IMG_WH, bgr_means, p) ValTransform = BaseTransform(size_cfg.IMG_WH, bgr_means, (2, 0, 1)) val_dataset = trainvalDataset(dataroot, valSet, ValTransform, dataset_name) val_loader = data.DataLoader( val_dataset, batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=detection_collate) for epoch in range(start_epoch + 1, end_epoch + 1): train_dataset = trainvalDataset(dataroot, trainSet, TrainTransform, dataset_name) epoch_size = len(train_dataset) train_loader = data.DataLoader( train_dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate) train(train_loader, net, criterion, optimizer, epoch, epoch_step, gamma, end_epoch, cfg) if (epoch % 5 == 0) or (epoch % 2 == 0 and epoch >= 60): save_checkpoint(net, epoch, size, optimizer) if (epoch >= 2 and epoch % 2 == 0): eval_net( val_dataset, val_loader, net, detector, cfg, ValTransform, top_k, thresh=thresh, batch_size=batch_size) save_checkpoint(net, end_epoch, size, optimizer)
def demo(v_f): cfg = Config.fromfile(config_f) anchor_config = anchors(cfg) priorbox = PriorBox(anchor_config) net = build_net('test', size=cfg.model.input_size, config=cfg.model.m2det_config) init_net(net, cfg, checkpoint_path) net.eval().to(device) with torch.no_grad(): priors = priorbox.forward().to(device) _preprocess = BaseTransform( cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) logging.info('detector initiated.') cap = cv2.VideoCapture(v_f) logging.info('detect on: {}'.format(v_f)) logging.info('video width: {}, height: {}'.format(int(cap.get(3)), int(cap.get(4)))) out_video = cv2.VideoWriter("result.mp4", cv2.VideoWriter_fourcc(*'MJPG'), 24, (int(cap.get(3)), int(cap.get(4)))) while True: ret, image = cap.read() if not ret: out_video.release() cv2.destroyAllWindows() cap.release() break w, h = image.shape[1], image.shape[0] img = _preprocess(image).unsqueeze(0).to(device) scale = torch.Tensor([w, h, w, h]) out = net(img) boxes, scores = detector.forward(out, priors) boxes = (boxes[0]*scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] for j in range(1, cfg.model.m2det_config.num_classes): inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype( np.float32, copy=False) soft_nms = cfg.test_cfg.soft_nms # min_thresh, device_id=0 if cfg.test_cfg.cuda else None) keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms) keep = keep[:cfg.test_cfg.keep_per_class] c_dets = c_dets[keep, :] allboxes.extend([_.tolist()+[j] for _ in c_dets]) if len(allboxes) > 0: allboxes = np.array(allboxes) # [boxes, scores, label_id] -> [id, score, boxes] 0, 1, 2, 3, 4, 5 allboxes = allboxes[:, [5, 4, 0, 1, 2, 3]] logging.info('allboxes shape: {}'.format(allboxes.shape)) res = visualize_det_cv2(image, allboxes, classes=classes, thresh=0.2) # res = visualize_det_cv2_fancy(image, allboxes, classes=classes, thresh=0.2, r=4, d=6) cv2.imshow('rr', res) out_video.write(res) cv2.waitKey(1)
use_cuda = torch.cuda.is_available() detect_model = build_net('test', ops.img_dim, ops.num_classes) # initialize detector #--------------------------------------------- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # chkpt = torch.load(ops.detect_model, map_location=device) chkpt = torch.load(ops.detect_model, map_location=lambda storage, loc: storage) detect_model.load_state_dict(chkpt) detect_model.eval() # 设置为前向推断模式 acc_model(ops, detect_model) detect_model = detect_model.to(device) detector = Detect(ops.num_classes, 0, cfg) # num_classes, bkg_label, cfg priorbox = PriorBox(cfg, debug_=False) with torch.no_grad(): priors = priorbox.forward() if use_cuda: priors = priors.cuda() video_capture = cv2.VideoCapture(ops.test_path) ret, img_raw = video_capture.read() if ret: # scale = torch.Tensor([img_raw.shape[1], img_raw.shape[0],img_raw.shape[1], img_raw.shape[0]]) scale = [ img_raw.shape[1], img_raw.shape[0], img_raw.shape[1], img_raw.shape[0] ] # if use_cuda:
from model.refinedet_vgg import build_net net = build_net('test', size=ssd_dim, num_classes=num_classes, use_refine=args.refine, c7_channel=args.c7_channel) else: net = None print('loading model!', args.model_dir, args.iteration) net.load_state_dict(torch.load(trained_model)) print(net) net.eval() print('Finished loading model!', args.model_dir, args.iteration, 'tub=' + str(args.tub), 'tub_thresh=' + str(args.tub_thresh), 'tub_score=' + str(args.tub_generate_score)) detector = Detect(num_classes, 0, args.top_k, args.confidence_threshold, args.nms_threshold) priorbox = PriorBox(cfg) # priorbox=PriorBox(multi_cfg['2.2']) with torch.no_grad(): priors = priorbox.forward().to(device) # load data net = net.to(device) # evaluation test_net(args.save_folder, net, dataset, BaseTransform(net.size, dataset_mean), args.top_k, detector, priors) else: out_dir = get_output_dir( pkl_dir, args.iteration + '_' + args.dataset_name + '_' + args.set_file_name) print('Without detection', out_dir)
class ObjDetector(object): def __init__(self, img_size=300, thresh=0.56): assert img_size == 300 or img_size == 512, 'net input image size must be 300 or 512' self.labels_name = LABELS_SET self.labels_numb = len(LABELS_SET) self.img_size = img_size self.cfg = VOC_300 if img_size == 300 else VOC_512 self.thresh = thresh self.gpu_is_available = torch.cuda.is_available() self.gpu_numb = torch.cuda.device_count() self.net = build_net('test', self.img_size, self.labels_numb) self.detect = Detect(self.labels_numb, 0, self.cfg) self.transform = BaseTransform(self.img_size) # load net weights state_dict = torch.load(trained_model, map_location='cpu') new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v self.net.load_state_dict(new_state_dict) self.net.eval() print('Finished loading model!') if self.gpu_numb > 1: self.net = torch.nn.DataParallel(self.net, device_ids=list( range(self.gpu_numb))) # set net gpu or cpu model if self.gpu_is_available: self.net.cuda() cudnn.benchmark = True # define box generator priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = priorbox.forward() if self.gpu_is_available: self.priors = self.priors.cuda() def __net__(self, img): scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) with torch.no_grad(): x = self.transform(img).unsqueeze(0) if self.gpu_is_available: x = x.cuda() scale = scale.cuda() # get net output out = self.net(x) boxes, scores = self.detect.forward(out, self.priors) boxes = boxes[0] scores = scores[0] # scale each detection back up to the image boxes *= scale boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() return boxes, scores def __call__(self, image): """ :param image: rgb image :return: {'label_name':[x1,y1,x2,y2,score],...} """ boxes = np.empty((0, 4)) scores = np.empty((0, self.labels_numb)) for img, p in self.__chips__(image): b = [p[0], p[1], p[0], p[1]] boxes_t, scores_t = self.__net__(img) boxes_t += list(map(float, b)) boxes = np.vstack((boxes, boxes_t)) scores = np.vstack((scores, scores_t)) # filter bounding boxes results = dict() for j in range(1, self.labels_numb): inds = np.where(scores[:, j] > self.thresh)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) keeped = nms(c_dets, 0.45, force_cpu=0) c_dets = c_dets[keeped, :] results[self.labels_name[j]] = c_dets return results def __chips__(self, image): h, w, _ = image.shape x = w // 2 y = h // 2 boxes = [] if min(h, w) > 1500: boxes.append((0, 0, x, y)) boxes.append((x, 0, w, y)) boxes.append((0, y, x, h)) boxes.append((x, y, w, h)) boxes.append((x // 2, y // 2, x + x // 2, y + y // 2)) else: boxes.append((0, 0, w, h)) for p in boxes: yield image[p[1]:p[3], p[0]:p[2]], p def draw(self, image, results): # draw bounding boxes for label, boxes in results.items(): for value in boxes: x1 = int(value[0]) y1 = int(value[1]) x2 = int(value[2]) y2 = int(value[3]) # label name and scores text = label + ',' + "%.2f" % value[4] # select color indx = self.labels_name.index(label) % len(COLORS) color = COLORS[indx] # draw bounding boxe cv2.rectangle(image, (x1, y1), (x2, y2), color, 2) # draw label font = cv2.FONT_HERSHEY_SIMPLEX font_scale = 0.58 size = cv2.getTextSize(text, font, font_scale, 1) # text_w = size[0][0] text_h = size[0][1] cv2.putText(image, text, (x1, max((y1 - text_h), 0)), font, font_scale, color, 1) return image
print_info('===> Finished constructing and loading model', ['yellow', 'bold']) net.eval() num_classes = cfg.model.num_classes with torch.no_grad(): priors = priorbox.forward() if cfg.test_cfg.cuda: net = net.cuda() priors = priors.cuda() cudnn.benchmark = True else: net = net.cpu() _preprocess = BaseTransform( cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) detector = Detect(num_classes, cfg.loss.bkg_label, anchor_config) base = int(np.ceil(pow(num_classes, 1. / 3))) cats = [_.strip().split(',')[-1] for _ in open('data/coco_labels.txt', 'r').readlines()] label_config = {'VOC': VOC_CLASSES, 'COCO': tuple(['__background__'] + cats)} labels = label_config[args.dataset] def draw_detection(im, bboxes, scores, cls_inds, fps, thr=0.2): imgcv = np.copy(im) h, w, _ = imgcv.shape for i, box in enumerate(bboxes): if scores[i] < thr: continue cls_indx = int(cls_inds[i])
if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) net.eval() if args.cuda: net = net.cuda() # cudnn.benchmark = True else: net = net.cpu() print('Finished loading model!') # print(net) start = time.time() detector = Detect(numclass, 0, cfg) out = net(x) # forward pass boxes, scores = detector.forward(out, priors) boxes = boxes[0] scores = scores[0] boxes *= scale boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() # scale each detection back up to the image bboxes = [] for j in range(1, numclass): inds = np.where(scores[:, j] > 0.1)[0] #conf > 0.6 if inds is None: continue c_bboxes = boxes[inds]
if args.cuda: net.cuda() cudnn.benchmark = True optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #optimizer = optim.RMSprop(net.parameters(), lr=args.lr,alpha = 0.9, eps=1e-08, # momentum=args.momentum, weight_decay=args.weight_decay) arm_criterion = RefineMultiBoxLoss(2, 0.5, True, 0, True, 3, 0.5, False) odm_criterion = RefineMultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False, 0.01) priorbox = PriorBox(cfg) detector = Detect(num_classes, 0, cfg, object_score=0.01) priors = Variable(priorbox.forward(), volatile=True) #dataset print('Loading Dataset...') if args.dataset == 'VOC': testset = VOCDetection(VOCroot, [('2007', 'test')], None, AnnotationTransform()) train_dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, p), AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection(COCOroot, [('2014', 'minival')], None) train_dataset = COCODetection(COCOroot, train_sets, preproc(img_dim, rgb_means, p)) else: print('Only VOC and COCO are supported now!')
def main(): mean = (104, 117, 123) print('loading model!') if deform: from model.dualrefinedet_vggbn import build_net net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=1024, def_groups=deform, multihead=multihead, bn=bn) else: from model.refinedet_vgg import build_net net = build_net('test', size=ssd_dim, num_classes=num_classes, use_refine=refine, c7_channel=1024, bn=bn) net.load_state_dict(torch.load(trained_model)) net.eval() print('Finished loading model!', trained_model) net = net.to(device) detector = Detect(num_classes, 0, top_k, confidence_threshold, nms_threshold) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward().to(device) for i, line in enumerate(open(img_set, 'r')): # if i==10: # break if 'COCO' in dataset: image_name = line[:-1] image_id = int(image_name.split('_')[-1]) elif 'VOC' in dataset: image_name = line[:-1] image_id = -1 else: image_name, image_id = line.split(' ') image_id = image_id[:-1] print(i, image_name, image_id) image_path = os.path.join(img_root, image_name + '.jpg') image = cv2.imread(image_path, 1) h, w, _ = image.shape image_draw = cv2.resize(image.copy(), (640, 480)) im_trans = base_transform(image, ssd_dim, mean) ######################## Detection ######################## with torch.no_grad(): x = torch.from_numpy(im_trans).unsqueeze(0).permute(0, 3, 1, 2).to(device) if 'RefineDet' in backbone and refine: arm_loc, _, loc, conf = net(x) else: loc, conf = net(x) arm_loc = None detections = detector.forward(loc, conf, priors, arm_loc_data=arm_loc) ############################################################ out = list() for j in range(1, detections.size(1)): dets = detections[0, j, :] if dets.sum() == 0: continue mask = dets[:, 0].gt(0.).expand(dets.size(-1), dets.size(0)).t() dets = torch.masked_select(dets, mask).view(-1, dets.size(-1)) boxes = dets[:, 1:-1] if dets.size(-1) == 6 else dets[:, 1:] boxes[:, 0] *= w boxes[:, 2] *= w boxes[:, 1] *= h boxes[:, 3] *= h scores = dets[:, 0].cpu().numpy() boxes_np = boxes.cpu().numpy() for b, s in zip(boxes_np, scores): if save_dir: out.append( [int(b[0]), int(b[1]), int(b[2]), int(b[3]), j - 1, s]) if 'COCO' in dataset: det_list.append({ 'image_id': image_id, 'category_id': labelmap[j], 'bbox': [ float('{:.1f}'.format(b[0])), float('{:.1f}'.format(b[1])), float('{:.1f}'.format(b[2] - b[0] + 1)), float('{:.1f}'.format(b[3] - b[1] + 1)) ], 'score': float('{:.2f}'.format(s)) }) else: results_file.write( str(image_id) + ' ' + str(j) + ' ' + str(s) + ' ' + str(np.around(b[0], 2)) + ' ' + str(np.around(b[1], 2)) + ' ' + str(np.around(b[2], 2)) + ' ' + str(np.around(b[3], 2)) + '\n') if display: cv2.rectangle(image_draw, (int(b[0] / w * 640), int(b[1] / h * 480)), (int(b[2] / w * 640), int(b[3] / h * 480)), (0, 255, 0), thickness=1) cls = class_name[j] if 'COCO' in dataset else str( labelmap[j - 1]) put_str = cls + ':' + str(np.around(s, decimals=2)) cv2.putText( image_draw, put_str, (int(b[0] / w * 640), int(b[1] / h * 480) - 10), cv2.FONT_HERSHEY_DUPLEX, 0.5, color=(0, 255, 0), thickness=1) if display: cv2.imshow('frame', image_draw) ch = cv2.waitKey(0) if ch == 115: if save_dir: print('save: ', line) torch.save( out, os.path.join(save_dir, '%s.pkl' % str(line[:-1]))) cv2.imwrite( os.path.join(save_dir, '%s.jpg' % str(line[:-1])), image) cv2.imwrite( os.path.join(save_dir, '%s_box.jpg' % str(line[:-1])), image_draw) cv2.destroyAllWindows() if save_dir: if dataset == 'COCO': json.dump(det_list, results_file) results_file.close()
def train(cfg): cfg = Config.fromfile(cfg) net = build_net('train', size=cfg.model.input_size, # Only 320, 512, 704 and 800 are supported config=cfg.model.m2det_config) init_net(net, cfg, False) net.to(device) if os.path.exists(checkpoint_path.format(start_epoch)): checkpoints = torch.load(checkpoint_path.format(start_epoch)) net.load_state_dict(checkpoints) logging.info('checkpoint loaded.') optimizer = optim.SGD(net.parameters(), lr=cfg.train_cfg.lr[0], momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) criterion = MultiBoxLoss(cfg.model.m2det_config.num_classes, overlap_thresh=cfg.loss.overlap_thresh, prior_for_matching=cfg.loss.prior_for_matching, bkg_label=cfg.loss.bkg_label, neg_mining=cfg.loss.neg_mining, neg_pos=cfg.loss.neg_pos, neg_overlap=cfg.loss.neg_overlap, encode_target=cfg.loss .encode_target) priorbox = PriorBox(anchors(cfg)) with torch.no_grad(): priors = priorbox.forward().to(device) net.train() anchor_config = anchors(cfg) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) logging.info('detector initiated.') dataset = get_dataloader(cfg, 'Helmet', 'train_sets') train_ds = DataLoader(dataset, cfg.train_cfg.per_batch_size, shuffle=True, num_workers=0, collate_fn=detection_collate) logging.info('dataset loaded, start to train...') for epoch in range(start_epoch, cfg.model.epochs): for i, data in enumerate(train_ds): try: lr = adjust_learning_rate_helmet(optimizer, epoch, cfg) images, targets = data images = images.to(device) targets = [anno.to(device) for anno in targets] out = net(images) optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() if i % 30 == 0: logging.info('Epoch: {}, iter: {}, loc_loss: {}, conf_loss: {}, loss: {}, lr: {}'.format( epoch, i, loss_l.item(), loss_c.item(), loss.item(), lr )) if i % 2000 == 0: # two_imgs = images[0:2, :] # out = net(two_imgs) # snap_middle_result(two_imgs[0], out[0], priors, detector, cfg, epoch) torch.save(net.state_dict(), checkpoint_path.format(epoch)) logging.info('model saved.') except KeyboardInterrupt: torch.save(net.state_dict(), checkpoint_path.format(epoch)) logging.info('model saved.') exit(0) torch.save(net.state_dict(), checkpoint_path.format(epoch))
new_state_dict[name] = v net.load_state_dict(new_state_dict) # 将读取的模型参数,灌入net中 net.eval() # 现在模型参数就有啦,可以进入评估模式了 print('Finished loading model!') print(net) # load data,加载测试数据 if args.dataset == 'VOC': testset = VOCDetection( VOCroot, [('2007', 'test')], None, AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection( COCOroot, [('2014', 'minival')], None) #COCOroot, [('2015', 'test-dev')], None) else: print('Only VOC and COCO dataset are supported now!') if args.cuda: net = net.cuda() cudnn.benchmark = True else: net = net.cpu() # evaluation #top_k = (300, 200)[args.dataset == 'COCO'] top_k = 200 # 每张图像上最多检出top_k个bbox detector = Detect(num_classes,0,cfg) # 调用detection.py里的Detect类,完成forward操作的detector save_folder = os.path.join(args.save_folder,args.dataset) rgb_means = ((104, 117, 123),(103.94,116.78,123.68))[args.version == 'RFB_mobile'] test_net(save_folder, net, detector, args.cuda, testset, BaseTransform(net.size, rgb_means, (2, 0, 1)), # resize + 减均值 + 通道调换 top_k, thresh=0.01) # thresh=0.01,为什么这么小?可以结合mAP介绍的笔记
def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 if args.resume_net: epoch = 0 + args.resume_epoch epoch_size = len(train_dataset) // args.batch_size max_iter = args.max_epoch * epoch_size stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size) stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size) stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO'] print('Training', args.version, 'on', train_dataset.name) ''' n_flops, n_convops, n_params = measure_model(net, int(args.size), int(args.size)) print('==> FLOPs: {:.4f}M, Conv_FLOPs: {:.4f}M, Params: {:.4f}M'. format(n_flops / 1e6, n_convops / 1e6, n_params / 1e6)) ''' print(' Total params: %.2fM' % (sum(p.numel() for p in net.parameters()) / 1000000.0)) step_index = 0 if args.visdom: # initialize visdom loss plot lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict(xlabel='Iteration', ylabel='Loss', title='Current SSD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'])) epoch_lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict( xlabel='Epoch', ylabel='Loss', title='Epoch SSD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'])) if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 lr = args.lr log_file = open(log_file_path, 'w') for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(train_dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate)) loc_loss = 0 conf_loss = 0 if epoch % args.save_frequency == 0 and epoch > 0: torch.save( net.state_dict(), os.path.join( save_folder, args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth')) if epoch % args.test_frequency == 0 and epoch > 0: net.eval() top_k = 200 detector = Detect(num_classes, 0, cfg) if args.dataset == 'VOC': APs, mAP = test_net(test_save_dir, net, detector, args.cuda, testset, BaseTransform(net.module.size, rgb_means, rgb_std, (2, 0, 1)), top_k, thresh=0.01) APs = [str(num) for num in APs] mAP = str(mAP) log_file.write(str(iteration) + ' APs:\n' + '\n'.join(APs)) log_file.write('mAP:\n' + mAP + '\n') else: test_net(test_save_dir, net, detector, args.cuda, testset, BaseTransform(net.module.size, rgb_means, rgb_std, (2, 0, 1)), top_k, thresh=0.01) net.train() epoch += 1 load_t0 = time.time() for iter_tmp in range(iteration, 0, -epoch_size * args.save_frequency): if iter_tmp in stepvalues: step_index = stepvalues.index(iter_tmp) + 1 if args.visdom: viz.line(X=torch.ones((1, 3)).cpu() * epoch, Y=torch.Tensor([ loc_loss, conf_loss, loc_loss + conf_loss ]).unsqueeze(0).cpu() / epoch_size, win=epoch_lot, update='append') break lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [ Variable(anno.cuda(), volatile=True) for anno in targets ] else: images = Variable(images) targets = [Variable(anno, volatile=True) for anno in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.data[0] conf_loss += loss_c.data[0] load_t1 = time.time() if iteration % 10 == 0: print(args.version + 'Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (loss_l.data[0], loss_c.data[0]) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) log_file.write('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (loss_l.data[0], loss_c.data[0]) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr) + '\n') if args.visdom and args.send_images_to_visdom: random_batch_index = np.random.randint(images.size(0)) viz.image(images.data[random_batch_index].cpu().numpy()) if args.visdom: viz.line(X=torch.ones((1, 3)).cpu() * iteration, Y=torch.Tensor([ loss_l.data[0], loss_c.data[0], loss_l.data[0] + loss_c.data[0] ]).unsqueeze(0).cpu(), win=lot, update='append') if iteration % epoch_size == 0: viz.line(X=torch.zeros((1, 3)).cpu(), Y=torch.Tensor( [loc_loss, conf_loss, loc_loss + conf_loss]).unsqueeze(0).cpu(), win=epoch_lot, update=True) log_file.close() torch.save( net.state_dict(), os.path.join(save_folder, 'Final_' + args.version + '_' + args.dataset + '.pth'))
plt.show() if __name__ == "__main__": Image = os.listdir('image/') for img_name in Image: img = cv2.imread("image/"+img_name) model = 'fssd_voc_79_74.pth' net = build_net(300, 21) state_dict = torch.load(model) from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) net.eval() net = net.cuda() cudnn.benchmark = True print("Finished loading model") transform = BaseTransform(300, (104, 117, 123), (2, 0, 1)) detector = Detect(21, 0, VOC_300) priorbox = PriorBox(VOC_300) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() test_net(net, img, img_name, detector, transform, priors,top_k=200, thresh=0.4)
if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) net.eval() print('Finished loading model!') if args.cuda: net = net.cuda() cudnn.benchmark = True else: net = net.cpu() detector = Detect(num_classes,0,cfg) transform = BaseTransform(img_dim, rgb_means, (2, 0, 1)) object_detector = ObjectDetector(net, detector, transform) img_list = os.listdir(args.img_dir) for i, img in enumerate(img_list): img_name = img img = os.path.join(args.img_dir, img) image = cv2.imread(img) detect_bboxes, tim = object_detector.predict(image) for class_id,class_collection in enumerate(detect_bboxes): if len(class_collection)>0: for i in range(class_collection.shape[0]): if class_collection[i,-1]>0.6:
def Model_Params(self, model_dir="output", use_gpu=True): ''' User Function - Set Model Params Args: model_dir (str): Select the right model name as per training model_path (str): Relative path to params file use_gpu (bool): If True use GPU else run on CPU Returns: None ''' f = open(model_dir +"/config_final.py", 'r'); lines = f.read(); f.close(); if(not use_gpu): lines = lines.replace("cuda=True", "cuda=False"); f = open(model_dir +"/config_test.py", 'w'); f.write(lines); f.close(); print("Loading model for inference"); self.system_dict["cfg"] = Config.fromfile(model_dir +"/config_test.py") anchor_config = anchors(self.system_dict["cfg"].model) self.system_dict["priorbox"] = PriorBox(anchor_config) self.system_dict["net"] = build_net('test', self.system_dict["cfg"].model.input_size, self.system_dict["cfg"].model) init_net(self.system_dict["net"], self.system_dict["cfg"], model_dir + "/VOC/Final_Pelee_VOC_size304.pth") print_info('===> Finished constructing and loading model', ['yellow', 'bold']) self.system_dict["net"].eval() with torch.no_grad(): self.system_dict["priors"] = self.system_dict["priorbox"].forward() if self.system_dict["cfg"].test_cfg.cuda: self.system_dict["net"] = self.system_dict["net"].cuda() self.system_dict["priors"] = self.system_dict["priors"].cuda() cudnn.benchmark = True else: self.system_dict["net"] = self.system_dict["net"].cpu() self.system_dict["_preprocess"] = BaseTransform(self.system_dict["cfg"].model.input_size, self.system_dict["cfg"].model.rgb_means, (2, 0, 1)) self.system_dict["num_classes"] = self.system_dict["cfg"].model.num_classes self.system_dict["detector"] = Detect(self.system_dict["num_classes"], self.system_dict["cfg"].loss.bkg_label, anchor_config) print("Done...."); print("Loading other params"); base = int(np.ceil(pow(self.system_dict["num_classes"], 1. / 3))) self.system_dict["colors"] = [self._to_color(x, base) for x in range(self.system_dict["num_classes"])] cats = ['__background__']; f = open(self.system_dict["class_list"]); lines = f.readlines(); f.close(); for i in range(len(lines)): if(lines != ""): cats.append(lines[i][:len(lines[i])-1]) self.system_dict["labels"] = cats; print("Done....");