def __init__(self, phase, num_classes): super(BlazeFace, self).__init__() self.phase = phase self.num_classes = num_classes self.conv_1 = nn.Conv2d(3, 24, kernel_size=3, stride=2, padding=1, bias=True) self.bn_1 = nn.BatchNorm2d(24) self.relu = nn.ReLU(inplace=True) self.blaze_1 = BlazeBlock(24, 24) self.blaze_2 = BlazeBlock(24, 24) self.blaze_3 = BlazeBlock(24, 48, stride=2) self.blaze_4 = BlazeBlock(48, 48) self.blaze_5 = BlazeBlock(48, 48) self.blaze_6 = BlazeBlock(48, 24, 96, stride=2) self.blaze_7 = BlazeBlock(96, 24, 96) self.blaze_8 = BlazeBlock(96, 24, 96) self.blaze_9 = BlazeBlock(96, 24, 96, stride=2) self.blaze_10 = BlazeBlock(96, 24, 96) self.blaze_11 = BlazeBlock(96, 24, 96) self.apply(initialize) self.head = mbox([self.blaze_9, self.blaze_10], [2, 6], 2) self.loc = nn.ModuleList(self.head[0]) self.conf = nn.ModuleList(self.head[1]) self.cfg = (wider_face) # print(self.cfg) self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
def get_model(cls): """Get the model object for this instance, loading it if it's not already loaded.""" trained_model = '/opt/ml/model/m2det512_vgg.pth' #trained_model = '../../m2det512_vgg.pth' anchor_config = anchors(cfg) print_info('The Anchor info: \n{}'.format(anchor_config)) priorbox = PriorBox(anchor_config) net = build_net('test', size=cfg.model.input_size, config=cfg.model.m2det_config) init_net(net, cfg, trained_model) print_info('===> Finished constructing and loading model', ['yellow', 'bold']) net.eval() with torch.no_grad(): priors = priorbox.forward() if cfg.test_cfg.cuda: net = net.cuda() priors = priors.cuda() cudnn.benchmark = True else: net = net.cpu() _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) return net, priors, _preprocess, detector
def _create_optimization(self): self.optimizer = optim.SGD(self.model.parameters(), lr=4e-3, weight_decay=0, momentum=0) self.criterion = MultiBoxLoss(self.num_classes, 0.5, True, 0, True, 3, 0.5, False).to(device) self.priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = self.priorbox.forward() if torch.cuda.is_available(): self.priors = self.priors.cuda()
def __init__(self, num_class = 21, levels = 3, num_channels = 128, model_name = 'efficientnet-b0'): super(EfficientDet, self).__init__() self.num_class = num_class self.levels = levels self.num_channels = num_channels self.efficientnet = EfficientNet.from_pretrained(model_name) print('efficientnet: ', self.efficientnet) self.bifpn = BiFPN(num_channels = self.num_channels) self.cfg = (coco, voc)[num_class == 21] self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True)
class Pelee_Det(object): def __init__(self): self.anchor_config = anchors(cfg.model) self.priorbox = PriorBox(self.anchor_config) self.net = build_net('test', cfg.model.input_size, cfg.model) init_net(self.net, cfg, args.trained_model) self.net.eval() self.num_classes = cfg.model.num_classes with torch.no_grad(): self.priors = self.priorbox.forward() self.net = self.net.cuda() self.priors = self.priors.cuda() cudnn.benchmark = True self._preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) self.detector = Detect(num_classes, cfg.loss.bkg_label, self.anchor_config) def detect(self, image): loop_start = time.time() w, h = image.shape[1], image.shape[0] img = self._preprocess(image).unsqueeze(0) if cfg.test_cfg.cuda: img = img.cuda() scale = torch.Tensor([w, h, w, h]) out = self.net(img) boxes, scores = self.detector.forward(out, self.priors) boxes = (boxes[0] * scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] count = 0 # for j in [2, 6, 7, 14, 15]: for j in range(1, len(ch_labels)): inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) soft_nms = cfg.test_cfg.soft_nms keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms) keep = keep[:cfg.test_cfg.keep_per_class] c_dets = c_dets[keep, :] allboxes.extend([_.tolist() + [j] for _ in c_dets]) loop_time = time.time() - loop_start allboxes = np.array(allboxes) boxes = allboxes[:, :4] scores = allboxes[:, 4] cls_inds = allboxes[:, 5] infos, im2show = draw_detection(image, boxes, scores, cls_inds, -1, args.thresh) return infos, im2show
class EfficientDet(nn.Module): def __init__(self, num_class = 21, levels = 3, num_channels = 128, model_name = 'efficientnet-b0'): super(EfficientDet, self).__init__() self.num_class = num_class self.levels = levels self.num_channels = num_channels self.efficientnet = EfficientNet.from_pretrained(model_name) print('efficientnet: ', self.efficientnet) self.bifpn = BiFPN(num_channels = self.num_channels) self.cfg = (coco, voc)[num_class == 21] self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) def forward(self, inputs): P1, P2, P3, P4, P5, P6, P7 = self.efficientnet(inputs) P3 = self.bifpn.Conv(in_channels=P3.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P3) P4 = self.bifpn.Conv(in_channels=P4.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P4) P5 = self.bifpn.Conv(in_channels=P5.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P5) P6 = self.bifpn.Conv(in_channels=P6.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P6) P7 = self.bifpn.Conv(in_channels=P7.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P7) for _ in range(self.levels): P3, P4, P5, P6, P7 = self.bifpn([P3, P4, P5, P6, P7]) P = [P3, P4, P5, P6, P7] features_class = [self.class_net(p, self.num_class) for p in P] features_class = torch.cat(features_class, axis=0) features_bbox = [self.regression_net(p) for p in P] features_bbox = torch.cat(features_bbox, axis=0) output = ( features_bbox.view(inputs.size(0), -1, 4), features_class.view(inputs.size(0), -1, self.num_class), self.priors ) return output @staticmethod def class_net(features, num_class, num_anchor=5): features = nn.Sequential( nn.Conv2d(in_channels=features.size(1), out_channels=features.size(2), kernel_size = 3, stride=1), nn.Conv2d(in_channels=features.size(2), out_channels=num_anchor*num_class, kernel_size = 3, stride=1) )(features) features = features.view(-1, num_class) features = nn.Sigmoid()(features) return features @staticmethod def regression_net(features, num_anchor=5): features = nn.Sequential( nn.Conv2d(in_channels=features.size(1), out_channels=features.size(2), kernel_size = 3, stride=1), nn.Conv2d(in_channels=features.size(2), out_channels=num_anchor*4, kernel_size = 3, stride=1) )(features) features = features.view(-1, 4) features = nn.Sigmoid()(features) return features
def __init__(self): self.anchor_config = anchors(cfg.model) self.priorbox = PriorBox(self.anchor_config) self.net = build_net('test', cfg.model.input_size, cfg.model) init_net(self.net, cfg, args.trained_model) self.net.eval() self.num_classes = cfg.model.num_classes with torch.no_grad(): self.priors = self.priorbox.forward() # self.net = self.net.cuda() # self.priors = self.priors.cuda() cudnn.benchmark = True self._preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) self.detector = Detect(num_classes, cfg.loss.bkg_label, self.anchor_config)
def __init__(self, img_size=300, thresh=0.56): assert img_size == 300 or img_size == 512, 'net input image size must be 300 or 512' self.labels_name = LABELS_SET self.labels_numb = len(LABELS_SET) self.img_size = img_size self.cfg = VOC_300 if img_size == 300 else VOC_512 self.thresh = thresh self.gpu_is_available = torch.cuda.is_available() self.gpu_numb = torch.cuda.device_count() self.net = build_net('test', self.img_size, self.labels_numb) self.detect = Detect(self.labels_numb, 0, self.cfg) self.transform = BaseTransform(self.img_size) # load net weights state_dict = torch.load(trained_model, map_location='cpu') new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v self.net.load_state_dict(new_state_dict) self.net.eval() print('Finished loading model!') if self.gpu_numb > 1: self.net = torch.nn.DataParallel(self.net, device_ids=list( range(self.gpu_numb))) # set net gpu or cpu model if self.gpu_is_available: self.net.cuda() cudnn.benchmark = True # define box generator priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = priorbox.forward() if self.gpu_is_available: self.priors = self.priors.cuda()
def __init__(self, num_class=21, levels=3, num_channels=128, model_name='efficientnet-b0'): super(EfficientDet, self).__init__() self.num_class = num_class self.levels = levels self.num_channels = num_channels self.efficientnet = EfficientNet.from_pretrained(model_name) self.cfg = (coco, voc)[num_class == 21] self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) self.num_anchor = 9 self.class_module = list() self.regress_module = list() for _ in range(3, 8): self.class_module.append( nn.Sequential( nn.Conv2d(in_channels=self.num_channels, out_channels=64, kernel_size=2, stride=1), nn.Conv2d(in_channels=64, out_channels=self.num_anchor * num_class, kernel_size=2, stride=1))) self.regress_module.append( nn.Sequential( nn.Conv2d(in_channels=self.num_channels, out_channels=64, kernel_size=2, stride=1), nn.Conv2d(in_channels=64, out_channels=self.num_anchor * 4, kernel_size=2, stride=1))) self.BIFPN = BIFPN(in_channels=[40, 80, 112, 192, 320], out_channels=self.num_channels, num_outs=5) self.sigmoid = nn.Sigmoid()
def __init__(self, phase, size, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = (coco, voc)[num_classes == 21] self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) self.size = size # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
def _init_model(self): if torch.cuda.is_available(): cuda = True if '300' in self.model_path: cfg = COCO_300 self.img_dim = 300 print('Model input size is 300') else: cfg = COCO_512 self.img_dim = 512 print('Model input size is 512') priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() if cuda: self.priors = priors.cuda() self.net = build_rfb_vgg_net('test', self.img_dim, self.num_classes) # initialize detector state_dict = torch.load(self.model_path)['state_dict'] # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v self.net.load_state_dict(new_state_dict) self.net.eval() if cuda: self.net = self.net.cuda() cudnn.benchmark = True else: self.net = self.net.cpu() print('Finished loading model!') # print(net) self.detector = Detect(self.num_classes, 0, cfg)
def im_detect(net, im_org, target_size, transform, cuda, means): # im = cv2.resize(im_org,target_size,target_size,3) im = cv2.resize(np.array(im_org), (target_size, target_size), interpolation=cv2.INTER_LINEAR).astype(np.float32) im -= means im = im.transpose((2, 0, 1)) scale = torch.Tensor( [im_org.shape[1], im_org.shape[0], im_org.shape[1], im_org.shape[0]]) x = Variable((torch.from_numpy(im)).unsqueeze(0), volatile=True) if cuda: x = x.cuda() scale = scale.cuda() out = net(x) cfg_temp = VOC_512 cfg['min_dim'] = target_size size = math.ceil(target_size / 4) multi = target_size / 300 for i in range(0, len(cfg['feature_maps'])): size = net.sizes[i] cfg['feature_maps'][i] = size # for i in range(0,len(cfg['min_sizes'])): # cfg['min_sizes'][i] *= multi # cfg['max_sizes'][i] *= multi priorbox_temp = PriorBox(cfg_temp) priors_temp = priorbox_temp.forward().cuda() priors_temp = Variable(priors_temp, volatile=True) boxes, scores = detector.forward(out, priors_temp) boxes = boxes[0] scores = scores[0] # scale = target_size boxes *= scale boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() return (boxes, scores)
def __init__(self): super(RFB_GUI, self).__init__() MyMessageBox(self) self.setWindowTitle("RFB-GUI Demo Program") self.resize(1280, 900) self.setFocus() self.file_item = QtWidgets.QAction('Open image', self) self.file_item.setShortcut('Ctrl+O') self.file_item.triggered.connect(self.select_file) self.label = DragLabel( "Please drag image here\nor\nPress Ctrl+O to select", self) self.label.addAction(self.file_item) self.setCentralWidget(self.label) self.priorbox = PriorBox(self.cfg) self.cuda = True self.numclass = 21 self.net = build_net('test', self.input_size, self.numclass) # initialize detector state_dict = torch.load(self.trained_model) new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] else: name = k new_state_dict[name] = v self.net.load_state_dict(new_state_dict) self.net.eval() if self.cuda: self.net = self.net.cuda() cudnn.benchmark = True else: self.net = self.net.cpu() print('Finished loading model!')
net = build_net( 'train', size=cfg.model.input_size, # Only 320, 512, 704 and 800 are supported config=cfg.model.m2det_config) init_net(net, cfg, args.resume_net ) # init the network with pretrained weights or resumed weights if args.ngpu > 1: net = torch.nn.DataParallel(net) if cfg.train_cfg.cuda: net.cuda() cudnn.benchmark = True optimizer = set_optimizer(net, cfg) criterion = set_criterion(cfg) priorbox = PriorBox(anchors(cfg)) with torch.no_grad(): priors = priorbox.forward() if cfg.train_cfg.cuda: priors = priors.cuda() if __name__ == '__main__': net.train() epoch = args.resume_epoch print_info('===> Loading Dataset...', ['yellow', 'bold']) dataset = get_dataloader(cfg, args.dataset, 'train_sets') epoch_size = len(dataset) // (cfg.train_cfg.per_batch_size * args.ngpu) max_iter = getattr(cfg.train_cfg.step_lr, args.dataset)[-1] * epoch_size stepvalues = [ _ * epoch_size
new_state_dict[name] = v net.load_state_dict(new_state_dict) if args.ngpu > 1: net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu))) if args.cuda: net.cuda() cudnn.benchmark = True detector = Detect(num_classes, 0, cfg) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False) priorbox = PriorBox(cfg) priors = Variable(priorbox.forward()) # dataset print('Loading Dataset...') if args.dataset == 'VOC': testset = VOCDetection( VOCroot, [('2007', 'test')], None, AnnotationTransform()) train_dataset = VOCDetection(VOCroot, train_sets, preproc( img_dim, rgb_means, rgb_std, p), AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection( COCOroot, [('2017', 'val')], None) #testset = COCODetection(COCOroot, [('2017', 'test-dev')], None) train_dataset = COCODetection(COCOroot, train_sets, preproc( img_dim, rgb_means, rgb_std, p)) else:
logger = set_logger(args.tensorboard) global cfg cfg = Config.fromfile(args.config) net = get_network(build_net, cfg, args.dataset) init_net(net, cfg, args.resume_net) # init the network with pretrained # weights or resumed weights if args.ngpu > 1: net = torch.nn.DataParallel(net) if cfg.train_cfg.cuda: net.cuda() cudnn.benckmark = True optimizer = set_optimizer(net, cfg) criterion = set_criterion(cfg, args.dataset) priorbox = PriorBox(anchors(cfg.model, args.dataset)) with torch.no_grad(): priors = priorbox.forward() if cfg.train_cfg.cuda: priors = priors.cuda() if __name__ == '__main__': net.train() epoch = args.resume_epoch print_info('===> Loading Dataset...', ['yellow', 'bold']) dataset = get_dataloader(cfg, args.dataset, 'train_sets') epoch_size = len(dataset) // (cfg.train_cfg.per_batch_size * args.ngpu) max_iter = getattr(cfg.train_cfg.step_lr, args.dataset)[-1] * epoch_size stepvalues = [ _ * epoch_size
def handler(context): dataset_alias = context.datasets trainval_dataset_id = dataset_alias['trainval'] test_dataset_id = dataset_alias['test'] trainval_dataset = list(load_dataset_from_api(trainval_dataset_id)) test_dataset = list(load_dataset_from_api(test_dataset_id)) trainval = DetectionDatasetFromAPI(trainval_dataset, transform=SSDAugmentation( min_dim, MEANS)) test = DetectionDatasetFromAPI(test_dataset, transform=SSDAugmentation(min_dim, MEANS)) train_dataset = trainval test_dataset = test priorbox = PriorBox(min_dim, PARAMS) with torch.no_grad(): priors = priorbox.forward().to(device) ssd_net = build_ssd('train', priors, min_dim, num_classes) ssd_net = ssd_net.to(device) url = 'https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth' weight_file = os.path.join(ABEJA_TRAINING_RESULT_DIR, 'vgg16_reducedfc.pth') download(url, weight_file) vgg_weights = torch.load(weight_file) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) optimizer = optim.SGD(ssd_net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4) criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False, PARAMS['variance'], device) # loss counters step_index = 0 trainloader = data.DataLoader(train_dataset, batch_size, num_workers=0, shuffle=True, collate_fn=tools.detection_collate, pin_memory=True) testloader = data.DataLoader(test_dataset, batch_size, num_workers=0, shuffle=False, collate_fn=tools.detection_collate, pin_memory=True) # create batch iterator iteration = 1 while iteration <= max_iter: ssd_net.train() for images, targets in trainloader: if iteration > max_iter: break if iteration in lr_steps: step_index += 1 adjust_learning_rate(optimizer, 0.1, step_index) # load train data images = images.to(device) targets = [ann.to(device) for ann in targets] # forward out = ssd_net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() if iteration % 100 == 0: print('[Train] iter {}, loss: {:.4f}'.format( iteration, loss.item())) statistics(iteration, loss.item(), None, None, None) writer.add_scalar('main/loss', loss.item(), iteration) writer.add_scalar('main/loc_loss', loss_l.item(), iteration) writer.add_scalar('main/conf_loss', loss_c.item(), iteration) if iteration % 10000 == 0: eval(testloader, ssd_net, criterion, iteration) ssd_net.train() iteration += 1 torch.save(ssd_net.state_dict(), os.path.join(ABEJA_TRAINING_RESULT_DIR, 'model.pth'))
def demo(v_f): cfg = Config.fromfile(config_f) anchor_config = anchors(cfg) priorbox = PriorBox(anchor_config) net = build_net('test', size=cfg.model.input_size, config=cfg.model.m2det_config) init_net(net, cfg, checkpoint_path) net.eval().to(device) with torch.no_grad(): priors = priorbox.forward().to(device) _preprocess = BaseTransform( cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) logging.info('detector initiated.') cap = cv2.VideoCapture(v_f) logging.info('detect on: {}'.format(v_f)) logging.info('video width: {}, height: {}'.format(int(cap.get(3)), int(cap.get(4)))) out_video = cv2.VideoWriter("result.mp4", cv2.VideoWriter_fourcc(*'MJPG'), 24, (int(cap.get(3)), int(cap.get(4)))) while True: ret, image = cap.read() if not ret: out_video.release() cv2.destroyAllWindows() cap.release() break w, h = image.shape[1], image.shape[0] img = _preprocess(image).unsqueeze(0).to(device) scale = torch.Tensor([w, h, w, h]) out = net(img) boxes, scores = detector.forward(out, priors) boxes = (boxes[0]*scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] for j in range(1, cfg.model.m2det_config.num_classes): inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype( np.float32, copy=False) soft_nms = cfg.test_cfg.soft_nms # min_thresh, device_id=0 if cfg.test_cfg.cuda else None) keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms) keep = keep[:cfg.test_cfg.keep_per_class] c_dets = c_dets[keep, :] allboxes.extend([_.tolist()+[j] for _ in c_dets]) if len(allboxes) > 0: allboxes = np.array(allboxes) # [boxes, scores, label_id] -> [id, score, boxes] 0, 1, 2, 3, 4, 5 allboxes = allboxes[:, [5, 4, 0, 1, 2, 3]] logging.info('allboxes shape: {}'.format(allboxes.shape)) res = visualize_det_cv2(image, allboxes, classes=classes, thresh=0.2) # res = visualize_det_cv2_fancy(image, allboxes, classes=classes, thresh=0.2, r=4, d=6) cv2.imshow('rr', res) out_video.write(res) cv2.waitKey(1)
def train(cfg): cfg = Config.fromfile(cfg) net = build_net('train', size=cfg.model.input_size, # Only 320, 512, 704 and 800 are supported config=cfg.model.m2det_config) init_net(net, cfg, False) net.to(device) if os.path.exists(checkpoint_path.format(start_epoch)): checkpoints = torch.load(checkpoint_path.format(start_epoch)) net.load_state_dict(checkpoints) logging.info('checkpoint loaded.') optimizer = optim.SGD(net.parameters(), lr=cfg.train_cfg.lr[0], momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) criterion = MultiBoxLoss(cfg.model.m2det_config.num_classes, overlap_thresh=cfg.loss.overlap_thresh, prior_for_matching=cfg.loss.prior_for_matching, bkg_label=cfg.loss.bkg_label, neg_mining=cfg.loss.neg_mining, neg_pos=cfg.loss.neg_pos, neg_overlap=cfg.loss.neg_overlap, encode_target=cfg.loss .encode_target) priorbox = PriorBox(anchors(cfg)) with torch.no_grad(): priors = priorbox.forward().to(device) net.train() anchor_config = anchors(cfg) detector = Detect(cfg.model.m2det_config.num_classes, cfg.loss.bkg_label, anchor_config) logging.info('detector initiated.') dataset = get_dataloader(cfg, 'Helmet', 'train_sets') train_ds = DataLoader(dataset, cfg.train_cfg.per_batch_size, shuffle=True, num_workers=0, collate_fn=detection_collate) logging.info('dataset loaded, start to train...') for epoch in range(start_epoch, cfg.model.epochs): for i, data in enumerate(train_ds): try: lr = adjust_learning_rate_helmet(optimizer, epoch, cfg) images, targets = data images = images.to(device) targets = [anno.to(device) for anno in targets] out = net(images) optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() if i % 30 == 0: logging.info('Epoch: {}, iter: {}, loc_loss: {}, conf_loss: {}, loss: {}, lr: {}'.format( epoch, i, loss_l.item(), loss_c.item(), loss.item(), lr )) if i % 2000 == 0: # two_imgs = images[0:2, :] # out = net(two_imgs) # snap_middle_result(two_imgs[0], out[0], priors, detector, cfg, epoch) torch.save(net.state_dict(), checkpoint_path.format(epoch)) logging.info('model saved.') except KeyboardInterrupt: torch.save(net.state_dict(), checkpoint_path.format(epoch)) logging.info('model saved.') exit(0) torch.save(net.state_dict(), checkpoint_path.format(epoch))
if args.gpu_id: net = torch.nn.DataParallel(net, device_ids=args.gpu_id) if args.cuda: net.cuda() cudnn.benchmark = True optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #optimizer = optim.RMSprop(net.parameters(), lr=args.lr,alpha = 0.9, eps=1e-08, # momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False) priorbox = PriorBox(cfg) priors = Variable(priorbox.forward(), volatile=True) #dataset print('Loading Dataset...') if args.dataset == 'VOC': testset = VOCDetection(VOCroot, [('2007', 'test')], None, AnnotationTransform()) train_dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, p, rgb_std), AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection(COCOroot, [('2014', 'minival')], None) train_dataset = COCODetection(COCOroot, train_sets, preproc(img_dim, rgb_means, p, rgb_std)) else: print('Only VOC and COCO are supported now!')
plt.show() if __name__ == "__main__": Image = os.listdir('image/') for img_name in Image: img = cv2.imread("image/"+img_name) model = 'fssd_voc_79_74.pth' net = build_net(300, 21) state_dict = torch.load(model) from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) net.eval() net = net.cuda() cudnn.benchmark = True print("Finished loading model") transform = BaseTransform(300, (104, 117, 123), (2, 0, 1)) detector = Detect(21, 0, VOC_300) priorbox = PriorBox(VOC_300) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() test_net(net, img, img_name, detector, transform, priors,top_k=200, thresh=0.4)
from layers.bbox_utils import match, match_ssd, decode import matplotlib.pyplot as plt dataset = WIDERDetection(cfg.TRAIN_FILE, transform=S3FDValTransform(cfg.INPUT_SIZE), train=False) data_loader = data.DataLoader(dataset, 64, num_workers=4, shuffle=False, collate_fn=detection_collate, pin_memory=True) anchor_boxes = PriorBox(cfg).forward() num_priors = anchor_boxes.size(0) variance = cfg.VARIANCE savepath = 'tmp' if not os.path.exists(savepath): os.makedirs(savepath) filename = os.path.join(savepath, 'match_anchor.pkl') def anchor_match_count(): anchor_scale_map = {16: 0, 32: 0, 64: 0, 128: 0, 256: 0, 512: 0} thresh = cfg.OVERLAP_THRESH sfd_scales = [] for idx, (_, target) in enumerate(data_loader):
if args.ngpu > 1: net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu))) if args.cuda: net.cuda() cudnn.benchmark = True optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #optimizer = optim.RMSprop(net.parameters(), lr=args.lr,alpha = 0.9, eps=1e-08, # momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False) priorbox = PriorBox(cfg) priors = Variable(priorbox.forward(), volatile=True) def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 + args.resume_epoch print('Loading Dataset...') if args.dataset == 'VOC': dataset = VOCDetection(VOCroot, train_sets, preproc( img_dim, rgb_means, p), AnnotationTransform()) elif args.dataset == 'COCO':
def main(): mean = (104, 117, 123) print('loading model!') if deform: from model.dualrefinedet_vggbn import build_net net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=1024, def_groups=deform, multihead=multihead, bn=bn) else: from model.refinedet_vgg import build_net net = build_net('test', size=ssd_dim, num_classes=num_classes, use_refine=refine, c7_channel=1024, bn=bn) net.load_state_dict(torch.load(trained_model)) net.eval() print('Finished loading model!', trained_model) net = net.to(device) detector = Detect(num_classes, 0, top_k, confidence_threshold, nms_threshold) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward().to(device) for i, line in enumerate(open(img_set, 'r')): # if i==10: # break if 'COCO' in dataset: image_name = line[:-1] image_id = int(image_name.split('_')[-1]) elif 'VOC' in dataset: image_name = line[:-1] image_id = -1 else: image_name, image_id = line.split(' ') image_id = image_id[:-1] print(i, image_name, image_id) image_path = os.path.join(img_root, image_name + '.jpg') image = cv2.imread(image_path, 1) h, w, _ = image.shape image_draw = cv2.resize(image.copy(), (640, 480)) im_trans = base_transform(image, ssd_dim, mean) ######################## Detection ######################## with torch.no_grad(): x = torch.from_numpy(im_trans).unsqueeze(0).permute(0, 3, 1, 2).to(device) if 'RefineDet' in backbone and refine: arm_loc, _, loc, conf = net(x) else: loc, conf = net(x) arm_loc = None detections = detector.forward(loc, conf, priors, arm_loc_data=arm_loc) ############################################################ out = list() for j in range(1, detections.size(1)): dets = detections[0, j, :] if dets.sum() == 0: continue mask = dets[:, 0].gt(0.).expand(dets.size(-1), dets.size(0)).t() dets = torch.masked_select(dets, mask).view(-1, dets.size(-1)) boxes = dets[:, 1:-1] if dets.size(-1) == 6 else dets[:, 1:] boxes[:, 0] *= w boxes[:, 2] *= w boxes[:, 1] *= h boxes[:, 3] *= h scores = dets[:, 0].cpu().numpy() boxes_np = boxes.cpu().numpy() for b, s in zip(boxes_np, scores): if save_dir: out.append( [int(b[0]), int(b[1]), int(b[2]), int(b[3]), j - 1, s]) if 'COCO' in dataset: det_list.append({ 'image_id': image_id, 'category_id': labelmap[j], 'bbox': [ float('{:.1f}'.format(b[0])), float('{:.1f}'.format(b[1])), float('{:.1f}'.format(b[2] - b[0] + 1)), float('{:.1f}'.format(b[3] - b[1] + 1)) ], 'score': float('{:.2f}'.format(s)) }) else: results_file.write( str(image_id) + ' ' + str(j) + ' ' + str(s) + ' ' + str(np.around(b[0], 2)) + ' ' + str(np.around(b[1], 2)) + ' ' + str(np.around(b[2], 2)) + ' ' + str(np.around(b[3], 2)) + '\n') if display: cv2.rectangle(image_draw, (int(b[0] / w * 640), int(b[1] / h * 480)), (int(b[2] / w * 640), int(b[3] / h * 480)), (0, 255, 0), thickness=1) cls = class_name[j] if 'COCO' in dataset else str( labelmap[j - 1]) put_str = cls + ':' + str(np.around(s, decimals=2)) cv2.putText( image_draw, put_str, (int(b[0] / w * 640), int(b[1] / h * 480) - 10), cv2.FONT_HERSHEY_DUPLEX, 0.5, color=(0, 255, 0), thickness=1) if display: cv2.imshow('frame', image_draw) ch = cv2.waitKey(0) if ch == 115: if save_dir: print('save: ', line) torch.save( out, os.path.join(save_dir, '%s.pkl' % str(line[:-1]))) cv2.imwrite( os.path.join(save_dir, '%s.jpg' % str(line[:-1])), image) cv2.imwrite( os.path.join(save_dir, '%s_box.jpg' % str(line[:-1])), image_draw) cv2.destroyAllWindows() if save_dir: if dataset == 'COCO': json.dump(det_list, results_file) results_file.close()
def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 + args.resume_epoch print('Loading Dataset...') if args.dataset == 'VOC': dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, p), AnnotationTransform()) elif args.dataset == 'COCO': # dataset = COCODetection(COCOroot, train_sets, preproc( # img_dim, rgb_means, p)) print('COCO not supported now!') return elif args.dataset == 'CUSTOM': dataset = CustomDetection(CUSTOMroot, train_sets, preproc(img_dim, rgb_means, p), CustomAnnotationTransform()) dataset_512 = CustomDetection(CUSTOMroot, train_sets, preproc(512, rgb_means, p), CustomAnnotationTransform()) else: print('Only VOC and COCO are supported now!') return epoch_size = len(dataset) // args.batch_size max_iter = args.max_epoch * epoch_size stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size ) # (80000,100000,120000) stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size) stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO'] print('Training', args.version, 'on', dataset.name) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 lr = args.lr image_size = 0 for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator image_size = ('300', '512')[1] #[random.randint(0,1)] batch_iterator = iter( data.DataLoader((dataset, dataset_512)[image_size == '512'], batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate)) priorbox = PriorBox((VOC_300_2, VOC_512_3)[image_size == '512']) priors = Variable(priorbox.forward(), volatile=True) loc_loss = 0 conf_loss = 0 #if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > 200): #torch.save(net.state_dict(), args.save_folder + args.version + '_' + args.dataset + '_epoches_' + #repr(epoch) + '.pth') epoch += 1 if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) # print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets])) if args.cuda: images = Variable(images.cuda()) targets = [ Variable(anno.cuda(), volatile=True) for anno in targets ] else: images = Variable(images) targets = [Variable(anno, volatile=True) for anno in targets] # forward load_t0 = time.time() # t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() # t1 = time.time() load_t1 = time.time() loc_loss += loss_l.data[0] conf_loss += loss_c.data[0] if iteration % 100 == 0: print('Epoch:' + repr(epoch) + ' || image-size:' + repr(image_size) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (loss_l.data[0], loss_c.data[0]) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) if iteration <= 110000 and (iteration == 0 or iteration % 10000 == 0): print('Saving state, iter:', iteration) torch.save(net.state_dict(), 'weights/ssd300_2_VOC_' + repr(iteration) + '.pth') elif (iteration > 110000) and iteration % 1000 == 0: print('Saving state, iter:', iteration) torch.save(net.state_dict(), 'weights/ssd300_2_VOC_' + repr(iteration) + '.pth') torch.save( net.state_dict(), args.save_folder + 'Final_' + args.version + '_' + args.dataset + '.pth')
parser = argparse.ArgumentParser() parser.add_argument("--prune_folder", default = "prunes/") parser.add_argument("--trained_model", default = "prunes/refineDet_trained.pth") parser.add_argument('--dataset_root', default=VOC_ROOT) parser.add_argument("--cut_ratio", default=0.2, type=float) parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda to train model') #for test_net: 200 in SSD paper, 200 for COCO, 300 for VOC parser.add_argument('--max_per_image', default=200, type=int, help='Top number of detections kept per image, further restrict the number of predictions to parse') args = parser.parse_args() cfg = voc320 # different from normal ssd, where the PriorBox is stored inside SSD object priorbox = PriorBox(cfg) priors = Variable(priorbox.forward().cuda(), volatile=True) # set the priors to cuda detector = RefineDetect(cfg['num_classes'], 0, cfg, object_score=0.01) def test_net(save_folder, net, detector, priors, cuda, testset, transform, max_per_image=200, thresh=0.05): # max_per_image is same as top_k if not os.path.exists(save_folder): os.mkdir(save_folder) num_images = len(testset) num_classes = len(labelmap) # +1 for background # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)]
size=ssd_dim, num_classes=num_classes, use_refine=args.refine, c7_channel=args.c7_channel) else: net = None print('loading model!', args.model_dir, args.iteration) net.load_state_dict(torch.load(trained_model)) print(net) net.eval() print('Finished loading model!', args.model_dir, args.iteration, 'tub=' + str(args.tub), 'tub_thresh=' + str(args.tub_thresh), 'tub_score=' + str(args.tub_generate_score)) detector = Detect(num_classes, 0, args.top_k, args.confidence_threshold, args.nms_threshold) priorbox = PriorBox(cfg) # priorbox=PriorBox(multi_cfg['2.2']) with torch.no_grad(): priors = priorbox.forward().to(device) # load data net = net.to(device) # evaluation test_net(args.save_folder, net, dataset, BaseTransform(net.size, dataset_mean), args.top_k, detector, priors) else: out_dir = get_output_dir( pkl_dir, args.iteration + '_' + args.dataset_name + '_' + args.set_file_name) print('Without detection', out_dir) do_python_eval(out_dir)
parser.add_argument('-c', '--config', default='configs/m2det320_vgg.py', type=str) parser.add_argument('-d', '--dataset', default='COCO', help='VOC or COCO version') parser.add_argument('-m', '--trained_model', default=None, type=str, help='Trained state_dict file path to open') parser.add_argument('--test', action='store_true', help='to submit a test file') args = parser.parse_args() print_info('----------------------------------------------------------------------\n' '| M2Det Evaluation Program |\n' '----------------------------------------------------------------------', ['yellow','bold']) global cfg cfg = Config.fromfile(args.config) if not os.path.exists(cfg.test_cfg.save_folder): os.mkdir(cfg.test_cfg.save_folder) anchor_config = anchors(cfg) print_info('The Anchor info: \n{}'.format(anchor_config)) priorbox = PriorBox(anchor_config) with torch.no_grad(): priors = priorbox.forward() if cfg.test_cfg.cuda: priors = priors.cuda() def test_net(save_folder, net, detector, cuda, testset, transform, max_per_image=300, thresh=0.005): if not os.path.exists(save_folder): os.mkdir(save_folder) num_images = len(testset) print_info('=> Total {} images to test.'.format(num_images),['yellow','bold']) num_classes = cfg.model.m2det_config.num_classes all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] _t = {'im_detect': Timer(), 'misc': Timer()}
def test_net(save_folder, net, dataset, transform, top_k, detector, priors): """Test a Fast R-CNN network on an image database.""" num_images = len(dataset) # all detections are collected into:score # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(len(labelmap) + 1)] # timers _t = {'im_detect': Timer(), 'misc': Timer()} all_time = 0. output_dir = get_output_dir( pkl_dir, args.iteration + '_' + args.dataset_name + '_' + args.set_file_name) det_file = os.path.join(output_dir, 'detections.pkl') output_dir = get_output_dir(output_dir, 'multi_test') ######################### Multiscale PriorBox ##################### priorboxes = {} for v1 in multi_scale[str(ssd_dim)]: if not multi_cfg[str(v1)]: return ("not included this multi_scale") priorbox = PriorBox(multi_cfg[str(v1)]) img_size = multi_cfg[str(v1)]['min_dim'] with torch.no_grad(): priorboxes[str(img_size)] = priorbox.forward().to(device) ########################## Detection ############################## for i in range(num_images): _t['im_detect'].tic() image = dataset.pull_image(i) h, w, _ = image.shape detections_multi = {} for v in multi_scale[str(ssd_dim)]: priors = priorboxes[str(v)] ssd_dim_temp = int(v) for loop in range(2): if (loop == 0): im_trans = base_transform(image, ssd_dim_temp, dataset_mean) im_trans = im_trans[:, :, (2, 1, 0)] else: im_f = image.copy() im_f = cv2.flip(im_f, 1) im_trans = base_transform(im_f, ssd_dim_temp, dataset_mean) im_trans = im_trans[:, :, (2, 1, 0)] with torch.no_grad(): x = torch.from_numpy(im_trans).unsqueeze(0).permute( 0, 3, 1, 2).to(device) if 'RefineDet' in args.backbone and args.refine: arm_loc, _, loc, conf = net(x) detections = detector.forward(loc, conf, priors, arm_loc_data=arm_loc) detections_multi[str(ssd_dim) + '_' + str(v) + '_' + str(loop)] = detections.clone() else: loc, conf = net(x) arm_loc = None detections = detector.forward(loc, conf, priors, arm_loc_data=arm_loc) detections_multi[str(ssd_dim) + '_' + str(v) + '_' + str(loop)] = detections.clone() detect_time = _t['im_detect'].toc(average=False) if i > 10: all_time += detect_time ################################################################### for j in range(1, detections.size(1)): cls_dets = np.array([]) for k, d in detections_multi.items(): dets = d[0, j, :] if dets.sum() == 0: continue mask = dets[:, 0].gt(0.).expand(dets.size(-1), dets.size(0)).t() dets = torch.masked_select(dets, mask).view(-1, dets.size(-1)) boxes = dets[:, 1:-1] if dets.size(-1) == 6 else dets[:, 1:] if (k[-1] == '1'): boxes[:, 0] = 1 - boxes[:, 0] boxes[:, 2] = 1 - boxes[:, 2] temp_swap = boxes[:, 0].clone() boxes[:, 0] = boxes[:, 2] boxes[:, 2] = temp_swap boxes[:, 0] *= w boxes[:, 2] *= w boxes[:, 1] *= h boxes[:, 3] *= h if k in ['320_192_0', '320_192_1', '512_320_0', '512_320_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.maximum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) > 32)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in ['320_320_0', '320_320_1', '512_512_0', '512_512_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.maximum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) > 0)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in ['320_384_0', '320_384_1', '512_640_0', '512_640_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.minimum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) < 160)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in ['320_448_0', '320_448_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.minimum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) < 128)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in ['320_512_0', '320_512_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.minimum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) < 96)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in ['320_576_0', '320_576_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.minimum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) < 64)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in [ '320_706_0', '320_706_1', '512_1216_0', '512_1216_1' ]: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.minimum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) < 32)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] if (index_temp.size == 0): continue scores = dets[index_temp, 0].cpu().numpy() cls_dets_temp = np.hstack((boxes.cpu().numpy(), scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if (cls_dets.size == 0): cls_dets = cls_dets_temp.copy() else: cls_dets = np.concatenate((cls_dets, cls_dets_temp), axis=0) if (cls_dets.size != 0): cls_dets = bbox_vote(cls_dets) if (len(cls_dets) != 0): all_boxes[j][i] = cls_dets print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images, detect_time)) FPS = (num_images - 10) / all_time print('FPS:', FPS) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') evaluate_detections(all_boxes, output_dir, dataset, FPS=FPS)
help='Dir to save results') parser.add_argument('-m', '--trained_model', default='weights/epoches_260.pth', type=str, help='Trained state_dict file path to open') parser.add_argument('--cuda', default=True, type=bool, help='Use cuda to train model') parser.add_argument('--cpu', default=False, type=bool, help='Use cpu nms') args = parser.parse_args() cfg = VOC_Config img_dim = 300 num_classes = 2 rgb_means = (104, 117, 123) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() if args.cuda: priors = priors.cuda() class ObjectDetector: def __init__(self, net, detection, transform, num_classes=2, thresh=0.1, cuda=True): self.net = net self.detection = detection self.transform = transform self.num_classes = num_classes self.thresh = thresh self.cuda = cuda
def __init__(self): self.cfg = cfg # Load data print('===> Loading data') self.train_loader = load_data( cfg.dataset, 'train') if 'train' in cfg.phase else None self.eval_loader = load_data(cfg.dataset, 'eval') if 'eval' in cfg.phase else None self.test_loader = load_data(cfg.dataset, 'test') if 'test' in cfg.phase else None # self.visualize_loader = load_data(cfg.DATASET, 'visualize') if 'visualize' in cfg.PHASE else None # Build model print('===> Building model') self.base_trans = BaseTransform(cfg.image_size[0], cfg.network.rgb_means, cfg.network.rgb_std, (2, 0, 1)) self.priors = PriorBox(cfg.anchor) self.model = eval(cfg.model + '.build_net')(cfg.image_size[0], cfg.dataset.num_classes) with torch.no_grad(): self.priors = self.priors.forward() self.detector = Detect2(cfg.post_process) # Utilize GPUs for computation self.use_gpu = torch.cuda.is_available() if cfg.train.train_scope == '': trainable_param = self.model.parameters() else: trainable_param = self.trainable_param(cfg.train.train_scope) self.output_dir = os.path.join(cfg.output_dir, cfg.name, cfg.date) if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) self.log_dir = os.path.join(self.output_dir, 'logs') if not os.path.exists(self.log_dir): os.makedirs(self.log_dir) self.checkpoint = cfg.train.checkpoint previous = self.find_previous() previous = False if previous: self.start_epoch = previous[0][-1] self.resume_checkpoint(previous[1][-1]) else: self.start_epoch = self.initialize() if self.use_gpu: print('Utilize GPUs for computation') print('Number of GPU available', torch.cuda.device_count()) self.model.cuda() self.priors.cuda() cudnn.benchmark = True if cfg.ngpu > 1: self.model = torch.nn.DataParallel(self.model, device_ids=list( range(cfg.ngpu))) # Print the model architecture and parameters #print('Model architectures:\n{}\n'.format(self.model)) #print('Parameters and size:') #for name, param in self.model.named_parameters(): # print('{}: {}'.format(name, list(param.size()))) # print trainable scope print('Trainable scope: {}'.format(cfg.train.train_scope)) self.optimizer = self.configure_optimizer(trainable_param, cfg.train.optimizer) self.exp_lr_scheduler = self.configure_lr_scheduler( self.optimizer, cfg.train.lr_scheduler) self.max_epochs = cfg.train.lr_scheduler.max_epochs # metric if cfg.network.multi_box_loss_type == 'origin': self.criterion = MultiBoxLoss2(cfg.matcher, self.priors, self.use_gpu) else: print('ERROR: ' + cfg.multi_box_loss_type + ' is not supported') sys.exit() # Set the logger self.writer = SummaryWriter(log_dir=self.log_dir) self.checkpoint_prefix = cfg.name + '_' + cfg.dataset.dataset