Exemple #1
0
    def get_model(cls):
        """Get the model object for this instance, loading it if it's not already loaded."""
        trained_model = '/opt/ml/model/m2det512_vgg.pth'
        #trained_model = '../../m2det512_vgg.pth'

        anchor_config = anchors(cfg)
        print_info('The Anchor info: \n{}'.format(anchor_config))
        priorbox = PriorBox(anchor_config)
        net = build_net('test',
                        size=cfg.model.input_size,
                        config=cfg.model.m2det_config)
        init_net(net, cfg, trained_model)
        print_info('===> Finished constructing and loading model',
                   ['yellow', 'bold'])
        net.eval()
        with torch.no_grad():
            priors = priorbox.forward()
            if cfg.test_cfg.cuda:
                net = net.cuda()
                priors = priors.cuda()
                cudnn.benchmark = True
            else:
                net = net.cpu()
        _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means,
                                    (2, 0, 1))
        detector = Detect(cfg.model.m2det_config.num_classes,
                          cfg.loss.bkg_label, anchor_config)

        return net, priors, _preprocess, detector
Exemple #2
0
class EfficientDet(nn.Module):
    def __init__(self,
                num_class = 21,
                levels = 3,
                num_channels = 128,
                model_name = 'efficientnet-b0'):
        super(EfficientDet, self).__init__()
        self.num_class = num_class 
        self.levels = levels
        self.num_channels = num_channels
        self.efficientnet = EfficientNet.from_pretrained(model_name)
        print('efficientnet: ', self.efficientnet)
        self.bifpn = BiFPN(num_channels = self.num_channels)

        self.cfg = (coco, voc)[num_class == 21]
        self.priorbox = PriorBox(self.cfg)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        
    
    def forward(self, inputs):
        P1, P2, P3, P4, P5, P6, P7 = self.efficientnet(inputs)
        P3 = self.bifpn.Conv(in_channels=P3.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P3)
        P4 = self.bifpn.Conv(in_channels=P4.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P4)
        P5 = self.bifpn.Conv(in_channels=P5.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P5)
        P6 = self.bifpn.Conv(in_channels=P6.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P6)
        P7 = self.bifpn.Conv(in_channels=P7.size(1), out_channels=self.num_channels, kernel_size=1, stride=1, padding=0)(P7)
        for _ in range(self.levels):
            P3, P4, P5, P6, P7 = self.bifpn([P3, P4, P5, P6, P7])
        P = [P3, P4, P5, P6, P7]
        
        features_class = [self.class_net(p, self.num_class) for p in P]
        features_class = torch.cat(features_class, axis=0)
        features_bbox = [self.regression_net(p) for p in P]
        features_bbox = torch.cat(features_bbox, axis=0)
        output = (
                features_bbox.view(inputs.size(0), -1, 4),
                features_class.view(inputs.size(0), -1, self.num_class),
                self.priors
            )
        return output
        
    @staticmethod
    def class_net(features, num_class, num_anchor=5):
        features = nn.Sequential(
            nn.Conv2d(in_channels=features.size(1), out_channels=features.size(2), kernel_size = 3, stride=1),
            nn.Conv2d(in_channels=features.size(2), out_channels=num_anchor*num_class, kernel_size = 3, stride=1)
        )(features)
        features = features.view(-1, num_class)
        features = nn.Sigmoid()(features)
        return features 
    @staticmethod
    def regression_net(features, num_anchor=5):
        features = nn.Sequential(
            nn.Conv2d(in_channels=features.size(1), out_channels=features.size(2), kernel_size = 3, stride=1),
            nn.Conv2d(in_channels=features.size(2), out_channels=num_anchor*4, kernel_size = 3, stride=1)
        )(features)
        features = features.view(-1, 4)
        features = nn.Sigmoid()(features)
        return features 
class Pelee_Det(object):
    def __init__(self):

        self.anchor_config = anchors(cfg.model)
        self.priorbox = PriorBox(self.anchor_config)
        self.net = build_net('test', cfg.model.input_size, cfg.model)
        init_net(self.net, cfg, args.trained_model)
        self.net.eval()

        self.num_classes = cfg.model.num_classes

        with torch.no_grad():
            self.priors = self.priorbox.forward()
            self.net = self.net.cuda()
            self.priors = self.priors.cuda()
            cudnn.benchmark = True
        self._preprocess = BaseTransform(cfg.model.input_size,
                                         cfg.model.rgb_means, (2, 0, 1))
        self.detector = Detect(num_classes, cfg.loss.bkg_label,
                               self.anchor_config)

    def detect(self, image):

        loop_start = time.time()
        w, h = image.shape[1], image.shape[0]
        img = self._preprocess(image).unsqueeze(0)
        if cfg.test_cfg.cuda:
            img = img.cuda()
        scale = torch.Tensor([w, h, w, h])
        out = self.net(img)
        boxes, scores = self.detector.forward(out, self.priors)
        boxes = (boxes[0] * scale).cpu().numpy()
        scores = scores[0].cpu().numpy()
        allboxes = []
        count = 0
        # for j in [2, 6, 7, 14, 15]:
        for j in range(1, len(ch_labels)):
            inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0]
            if len(inds) == 0:
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            soft_nms = cfg.test_cfg.soft_nms
            keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms)
            keep = keep[:cfg.test_cfg.keep_per_class]
            c_dets = c_dets[keep, :]
            allboxes.extend([_.tolist() + [j] for _ in c_dets])

        loop_time = time.time() - loop_start
        allboxes = np.array(allboxes)
        boxes = allboxes[:, :4]
        scores = allboxes[:, 4]
        cls_inds = allboxes[:, 5]
        infos, im2show = draw_detection(image, boxes, scores, cls_inds, -1,
                                        args.thresh)
        return infos, im2show
Exemple #4
0
    def __init__(self, img_size=300, thresh=0.56):
        assert img_size == 300 or img_size == 512, 'net input image size must be 300 or 512'
        self.labels_name = LABELS_SET
        self.labels_numb = len(LABELS_SET)
        self.img_size = img_size
        self.cfg = VOC_300 if img_size == 300 else VOC_512
        self.thresh = thresh
        self.gpu_is_available = torch.cuda.is_available()
        self.gpu_numb = torch.cuda.device_count()
        self.net = build_net('test', self.img_size, self.labels_numb)
        self.detect = Detect(self.labels_numb, 0, self.cfg)
        self.transform = BaseTransform(self.img_size)

        # load net weights
        state_dict = torch.load(trained_model, map_location='cpu')
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            head = k[:7]
            if head == 'module.':
                name = k[7:]  # remove `module.`
            else:
                name = k
            new_state_dict[name] = v
        self.net.load_state_dict(new_state_dict)
        self.net.eval()
        print('Finished loading model!')

        if self.gpu_numb > 1:
            self.net = torch.nn.DataParallel(self.net,
                                             device_ids=list(
                                                 range(self.gpu_numb)))

        # set net gpu or cpu model
        if self.gpu_is_available:
            self.net.cuda()
            cudnn.benchmark = True

        # define box generator
        priorbox = PriorBox(self.cfg)
        with torch.no_grad():
            self.priors = priorbox.forward()
            if self.gpu_is_available:
                self.priors = self.priors.cuda()
def im_detect(net, im_org, target_size, transform, cuda, means):
    # im = cv2.resize(im_org,target_size,target_size,3)
    im = cv2.resize(np.array(im_org), (target_size, target_size),
                    interpolation=cv2.INTER_LINEAR).astype(np.float32)
    im -= means
    im = im.transpose((2, 0, 1))
    scale = torch.Tensor(
        [im_org.shape[1], im_org.shape[0], im_org.shape[1], im_org.shape[0]])

    x = Variable((torch.from_numpy(im)).unsqueeze(0), volatile=True)
    if cuda:
        x = x.cuda()
        scale = scale.cuda()

    out = net(x)

    cfg_temp = VOC_512
    cfg['min_dim'] = target_size
    size = math.ceil(target_size / 4)
    multi = target_size / 300
    for i in range(0, len(cfg['feature_maps'])):
        size = net.sizes[i]
        cfg['feature_maps'][i] = size
    # for i in range(0,len(cfg['min_sizes'])):
    #     cfg['min_sizes'][i] *= multi
    #     cfg['max_sizes'][i] *= multi
    priorbox_temp = PriorBox(cfg_temp)
    priors_temp = priorbox_temp.forward().cuda()
    priors_temp = Variable(priors_temp, volatile=True)

    boxes, scores = detector.forward(out, priors_temp)
    boxes = boxes[0]
    scores = scores[0]

    # scale = target_size
    boxes *= scale
    boxes = boxes.cpu().numpy()
    scores = scores.cpu().numpy()

    return (boxes, scores)
Exemple #6
0
    def _init_model(self):
        if torch.cuda.is_available():
            cuda = True
        if '300' in self.model_path:
            cfg = COCO_300
            self.img_dim = 300
            print('Model input size is 300')
        else:
            cfg = COCO_512
            self.img_dim = 512
            print('Model input size is 512')

        priorbox = PriorBox(cfg)
        with torch.no_grad():
            priors = priorbox.forward()
            if cuda:
                self.priors = priors.cuda()

        self.net = build_rfb_vgg_net('test', self.img_dim, self.num_classes)  # initialize detector
        state_dict = torch.load(self.model_path)['state_dict']
        # create new OrderedDict that does not contain `module.`
        from collections import OrderedDict
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            head = k[:7]
            if head == 'module.':
                name = k[7:]  # remove `module.`
            else:
                name = k
            new_state_dict[name] = v
        self.net.load_state_dict(new_state_dict)
        self.net.eval()
        if cuda:
            self.net = self.net.cuda()
            cudnn.benchmark = True
        else:
            self.net = self.net.cpu()
        print('Finished loading model!')
        # print(net)
        self.detector = Detect(self.num_classes, 0, cfg)
Exemple #7
0
    def Train(self,
              epochs=200,
              log_iters=True,
              output_weights_dir="weights",
              saved_epoch_interval=10):
        self.system_dict["params"]["max_epoch"] = epochs
        self.system_dict["params"]["log_iters"] = log_iters
        self.system_dict["params"]["save_folder"] = output_weights_dir

        if not os.path.exists(self.system_dict["params"]["save_folder"]):
            os.mkdir(self.system_dict["params"]["save_folder"])

        if (self.system_dict["params"]["size"] == 300):
            cfg = COCO_300
        else:
            cfg = COCO_512

        if self.system_dict["params"]["version"] == 'RFB_vgg':
            from models.RFB_Net_vgg import build_net
        elif self.system_dict["params"]["version"] == 'RFB_E_vgg':
            from models.RFB_Net_E_vgg import build_net
        elif self.system_dict["params"]["version"] == 'RFB_mobile':
            from models.RFB_Net_mobile import build_net
            cfg = COCO_mobile_300
        else:
            print('Unkown version!')

        img_dim = (300, 512)[self.system_dict["params"]["size"] == 512]
        rgb_means = ((104, 117, 123), (
            103.94, 116.78,
            123.68))[self.system_dict["params"]["version"] == 'RFB_mobile']
        p = (0.6, 0.2)[self.system_dict["params"]["version"] == 'RFB_mobile']

        f = open(
            self.system_dict["dataset"]["train"]["root_dir"] + "/" +
            self.system_dict["dataset"]["train"]["coco_dir"] +
            "/annotations/classes.txt", 'r')
        lines = f.readlines()
        if (lines[-1] == ""):
            num_classes = len(lines) - 1
        else:
            num_classes = len(lines) + 1

        batch_size = self.system_dict["params"]["batch_size"]
        weight_decay = self.system_dict["params"]["weight_decay"]
        gamma = self.system_dict["params"]["gamma"]
        momentum = self.system_dict["params"]["momentum"]

        self.system_dict["local"]["net"] = build_net('train', img_dim,
                                                     num_classes)

        if self.system_dict["params"]["resume_net"] == None:
            base_weights = torch.load(self.system_dict["params"]["basenet"])
            print('Loading base network...')
            self.system_dict["local"]["net"].base.load_state_dict(base_weights)

            def xavier(param):
                init.xavier_uniform(param)

            def weights_init(m):
                for key in m.state_dict():
                    if key.split('.')[-1] == 'weight':
                        if 'conv' in key:
                            init.kaiming_normal_(m.state_dict()[key],
                                                 mode='fan_out')
                        if 'bn' in key:
                            m.state_dict()[key][...] = 1
                    elif key.split('.')[-1] == 'bias':
                        m.state_dict()[key][...] = 0

            print('Initializing weights...')
            # initialize newly added layers' weights with kaiming_normal method
            self.system_dict["local"]["net"].extras.apply(weights_init)
            self.system_dict["local"]["net"].loc.apply(weights_init)
            self.system_dict["local"]["net"].conf.apply(weights_init)
            self.system_dict["local"]["net"].Norm.apply(weights_init)
            if self.system_dict["params"]["version"] == 'RFB_E_vgg':
                self.system_dict["local"]["net"].reduce.apply(weights_init)
                self.system_dict["local"]["net"].up_reduce.apply(weights_init)

        else:
            # load resume network
            print('Loading resume network...')
            state_dict = torch.load(self.system_dict["params"]["resume_net"])
            # create new OrderedDict that does not contain `module.`
            from collections import OrderedDict
            new_state_dict = OrderedDict()
            for k, v in state_dict.items():
                head = k[:7]
                if head == 'module.':
                    name = k[7:]  # remove `module.`
                else:
                    name = k
                new_state_dict[name] = v
            self.system_dict["local"]["net"].load_state_dict(new_state_dict)

        if self.system_dict["params"]["ngpu"] > 1:
            self.system_dict["local"]["net"] = torch.nn.DataParallel(
                self.system_dict["local"]["net"],
                device_ids=list(range(self.system_dict["params"]["ngpu"])))

        if self.system_dict["params"]["cuda"]:
            self.system_dict["local"]["net"].cuda()
            cudnn.benchmark = True

        optimizer = optim.SGD(
            self.system_dict["local"]["net"].parameters(),
            lr=self.system_dict["params"]["lr"],
            momentum=self.system_dict["params"]["momentum"],
            weight_decay=self.system_dict["params"]["weight_decay"])
        #optimizer = optim.RMSprop(self.system_dict["local"]["net"].parameters(), lr=self.system_dict["params"]["lr"], alpha = 0.9, eps=1e-08,
        #                      momentum=self.system_dict["params"]["momentum"], weight_decay=self.system_dict["params"]["weight_decay"])

        criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5,
                                 False)
        priorbox = PriorBox(cfg)
        with torch.no_grad():
            priors = priorbox.forward()
            if self.system_dict["params"]["cuda"]:
                priors = priors.cuda()

        self.system_dict["local"]["net"].train()
        # loss counters
        loc_loss = 0  # epoch
        conf_loss = 0
        epoch = 0 + self.system_dict["params"]["resume_epoch"]
        print('Loading Dataset...')

        if (os.path.isdir("coco_cache")):
            os.system("rm -r coco_cache")

        dataset = COCODetection(
            self.system_dict["dataset"]["train"]["root_dir"],
            self.system_dict["dataset"]["train"]["coco_dir"],
            self.system_dict["dataset"]["train"]["set_dir"],
            preproc(img_dim, rgb_means, p))

        epoch_size = len(dataset) // self.system_dict["params"]["batch_size"]
        max_iter = self.system_dict["params"]["max_epoch"] * epoch_size

        stepvalues = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size)
        print('Training', self.system_dict["params"]["version"], 'on',
              dataset.name)
        step_index = 0

        if self.system_dict["params"]["resume_epoch"] > 0:
            start_iter = self.system_dict["params"]["resume_epoch"] * epoch_size
        else:
            start_iter = 0

        lr = self.system_dict["params"]["lr"]

        for iteration in range(start_iter, max_iter):
            if iteration % epoch_size == 0:
                # create batch iterator
                batch_iterator = iter(
                    data.DataLoader(
                        dataset,
                        batch_size,
                        shuffle=True,
                        num_workers=self.system_dict["params"]["num_workers"],
                        collate_fn=detection_collate))
                loc_loss = 0
                conf_loss = 0

                torch.save(
                    self.system_dict["local"]["net"].state_dict(),
                    self.system_dict["params"]["save_folder"] + "/" +
                    self.system_dict["params"]["version"] + '_' +
                    self.system_dict["params"]["dataset"] + '_epoches_' +
                    'intermediate' + '.pth')
                epoch += 1

            load_t0 = time.time()
            if iteration in stepvalues:
                step_index += 1
            lr = self.adjust_learning_rate(optimizer,
                                           self.system_dict["params"]["gamma"],
                                           epoch, step_index, iteration,
                                           epoch_size)

            # load train data
            images, targets = next(batch_iterator)

            #print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets]))

            if self.system_dict["params"]["cuda"]:
                images = Variable(images.cuda())
                targets = [Variable(anno.cuda()) for anno in targets]
            else:
                images = Variable(images)
                targets = [Variable(anno) for anno in targets]
            # forward
            t0 = time.time()
            out = self.system_dict["local"]["net"](images)
            # backprop
            optimizer.zero_grad()
            loss_l, loss_c = criterion(out, priors, targets)
            loss = loss_l + loss_c
            loss.backward()
            optimizer.step()
            t1 = time.time()
            loc_loss += loss_l.item()
            conf_loss += loss_c.item()
            load_t1 = time.time()
            if iteration % saved_epoch_interval == 0:
                print('Epoch:' + repr(epoch) + ' || epochiter: ' +
                      repr(iteration % epoch_size) + '/' + repr(epoch_size) +
                      '|| Current iter ' + repr(iteration) + '|| Total iter ' +
                      repr(max_iter) + ' || L: %.4f C: %.4f||' %
                      (loss_l.item(), loss_c.item()) +
                      'Batch time: %.4f sec. ||' % (load_t1 - load_t0) +
                      'LR: %.8f' % (lr))

        torch.save(
            self.system_dict["local"]["net"].state_dict(),
            self.system_dict["params"]["save_folder"] + "/" + 'Final_' +
            self.system_dict["params"]["version"] + '_' +
            self.system_dict["params"]["dataset"] + '.pth')
def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0 + args.resume_epoch
    print('Loading Dataset...')

    if args.dataset == 'VOC':
        dataset = VOCDetection(VOCroot, train_sets,
                               preproc(img_dim, rgb_means, p),
                               AnnotationTransform())
    elif args.dataset == 'COCO':
        # dataset = COCODetection(COCOroot, train_sets, preproc(
        #     img_dim, rgb_means, p))
        print('COCO not supported now!')
        return
    elif args.dataset == 'CUSTOM':
        dataset = CustomDetection(CUSTOMroot, train_sets,
                                  preproc(img_dim, rgb_means, p),
                                  CustomAnnotationTransform())
        dataset_512 = CustomDetection(CUSTOMroot, train_sets,
                                      preproc(512, rgb_means, p),
                                      CustomAnnotationTransform())
    else:
        print('Only VOC and COCO are supported now!')
        return

    epoch_size = len(dataset) // args.batch_size
    max_iter = args.max_epoch * epoch_size

    stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size
                      )  # (80000,100000,120000)
    stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size)
    stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO']
    print('Training', args.version, 'on', dataset.name)
    step_index = 0

    if args.resume_epoch > 0:
        start_iter = args.resume_epoch * epoch_size
    else:
        start_iter = 0

    lr = args.lr
    image_size = 0
    for iteration in range(start_iter, max_iter):
        if iteration % epoch_size == 0:
            # create batch iterator
            image_size = ('300', '512')[1]  #[random.randint(0,1)]
            batch_iterator = iter(
                data.DataLoader((dataset, dataset_512)[image_size == '512'],
                                batch_size,
                                shuffle=True,
                                num_workers=args.num_workers,
                                collate_fn=detection_collate))
            priorbox = PriorBox((VOC_300_2, VOC_512_3)[image_size == '512'])
            priors = Variable(priorbox.forward(), volatile=True)
            loc_loss = 0
            conf_loss = 0
            #if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > 200):
            #torch.save(net.state_dict(), args.save_folder + args.version + '_' + args.dataset + '_epoches_' +
            #repr(epoch) + '.pth')
            epoch += 1

        if iteration in stepvalues:
            step_index += 1
        lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index,
                                  iteration, epoch_size)

        # load train data
        images, targets = next(batch_iterator)

        # print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets]))

        if args.cuda:
            images = Variable(images.cuda())
            targets = [
                Variable(anno.cuda(), volatile=True) for anno in targets
            ]
        else:
            images = Variable(images)
            targets = [Variable(anno, volatile=True) for anno in targets]
        # forward
        load_t0 = time.time()
        # t0 = time.time()
        out = net(images)
        # backprop
        optimizer.zero_grad()
        loss_l, loss_c = criterion(out, priors, targets)
        loss = loss_l + loss_c
        loss.backward()
        optimizer.step()
        # t1 = time.time()
        load_t1 = time.time()
        loc_loss += loss_l.data[0]
        conf_loss += loss_c.data[0]

        if iteration % 100 == 0:
            print('Epoch:' + repr(epoch) + ' || image-size:' +
                  repr(image_size) + ' || epochiter: ' +
                  repr(iteration % epoch_size) + '/' + repr(epoch_size) +
                  '|| Totel iter ' + repr(iteration) +
                  ' || L: %.4f C: %.4f||' % (loss_l.data[0], loss_c.data[0]) +
                  'Batch time: %.4f sec. ||' % (load_t1 - load_t0) +
                  'LR: %.8f' % (lr))
        if iteration <= 110000 and (iteration == 0 or iteration % 10000 == 0):
            print('Saving state, iter:', iteration)
            torch.save(net.state_dict(),
                       'weights/ssd300_2_VOC_' + repr(iteration) + '.pth')
        elif (iteration > 110000) and iteration % 1000 == 0:
            print('Saving state, iter:', iteration)
            torch.save(net.state_dict(),
                       'weights/ssd300_2_VOC_' + repr(iteration) + '.pth')

    torch.save(
        net.state_dict(), args.save_folder + 'Final_' + args.version + '_' +
        args.dataset + '.pth')
def train(cfg):
    cfg = Config.fromfile(cfg)
    net = build_net('train',
                    size=cfg.model.input_size,  # Only 320, 512, 704 and 800 are supported
                    config=cfg.model.m2det_config)
    init_net(net, cfg, False)
    net.to(device)
    if os.path.exists(checkpoint_path.format(start_epoch)):
        checkpoints = torch.load(checkpoint_path.format(start_epoch))
        net.load_state_dict(checkpoints)
        logging.info('checkpoint loaded.')

    optimizer = optim.SGD(net.parameters(),
                          lr=cfg.train_cfg.lr[0],
                          momentum=cfg.optimizer.momentum,
                          weight_decay=cfg.optimizer.weight_decay)
    criterion = MultiBoxLoss(cfg.model.m2det_config.num_classes,
                             overlap_thresh=cfg.loss.overlap_thresh,
                             prior_for_matching=cfg.loss.prior_for_matching,
                             bkg_label=cfg.loss.bkg_label,
                             neg_mining=cfg.loss.neg_mining,
                             neg_pos=cfg.loss.neg_pos,
                             neg_overlap=cfg.loss.neg_overlap,
                             encode_target=cfg.loss
                             .encode_target)
    priorbox = PriorBox(anchors(cfg))
    with torch.no_grad():
        priors = priorbox.forward().to(device)
    net.train()

    anchor_config = anchors(cfg)
    detector = Detect(cfg.model.m2det_config.num_classes,
                      cfg.loss.bkg_label, anchor_config)
    logging.info('detector initiated.')

    dataset = get_dataloader(cfg, 'Helmet', 'train_sets')
    train_ds = DataLoader(dataset, cfg.train_cfg.per_batch_size,
                          shuffle=True,
                          num_workers=0,
                          collate_fn=detection_collate)
    logging.info('dataset loaded, start to train...')

    for epoch in range(start_epoch, cfg.model.epochs):
        for i, data in enumerate(train_ds):
            try:
                lr = adjust_learning_rate_helmet(optimizer, epoch, cfg)
                images, targets = data
                images = images.to(device)
                targets = [anno.to(device) for anno in targets]
                out = net(images)

                optimizer.zero_grad()
                loss_l, loss_c = criterion(out, priors, targets)
                loss = loss_l + loss_c
                loss.backward()
                optimizer.step()

                if i % 30 == 0:
                    logging.info('Epoch: {}, iter: {}, loc_loss: {}, conf_loss: {}, loss: {}, lr: {}'.format(
                        epoch, i, loss_l.item(), loss_c.item(), loss.item(), lr
                    ))

                if i % 2000 == 0:
                    # two_imgs = images[0:2, :]
                    # out = net(two_imgs)
                    # snap_middle_result(two_imgs[0], out[0], priors, detector, cfg, epoch)
                    torch.save(net.state_dict(), checkpoint_path.format(epoch))
                    logging.info('model saved.')
            except KeyboardInterrupt:
                torch.save(net.state_dict(), checkpoint_path.format(epoch))
                logging.info('model saved.')
                exit(0)
    torch.save(net.state_dict(), checkpoint_path.format(epoch))
class Solver(object):
    """
    A wrapper class for the training process
    """
    def __init__(self):
        self.cfg = cfg

        # Load data
        print('===> Loading data')
        self.train_loader = load_data(
            cfg.dataset, 'train') if 'train' in cfg.phase else None
        self.eval_loader = load_data(cfg.dataset,
                                     'eval') if 'eval' in cfg.phase else None
        self.test_loader = load_data(cfg.dataset,
                                     'test') if 'test' in cfg.phase else None
        # self.visualize_loader = load_data(cfg.DATASET, 'visualize') if 'visualize' in cfg.PHASE else None

        # Build model
        print('===> Building model')
        self.base_trans = BaseTransform(cfg.image_size[0],
                                        cfg.network.rgb_means,
                                        cfg.network.rgb_std, (2, 0, 1))
        self.priors = PriorBox(cfg.anchor)
        self.model = eval(cfg.model + '.build_net')(cfg.image_size[0],
                                                    cfg.dataset.num_classes)
        with torch.no_grad():
            self.priors = self.priors.forward()
        self.detector = Detect2(cfg.post_process)
        # Utilize GPUs for computation
        self.use_gpu = torch.cuda.is_available()
        if cfg.train.train_scope == '':
            trainable_param = self.model.parameters()
        else:
            trainable_param = self.trainable_param(cfg.train.train_scope)
        self.output_dir = os.path.join(cfg.output_dir, cfg.name, cfg.date)
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)
        self.log_dir = os.path.join(self.output_dir, 'logs')
        if not os.path.exists(self.log_dir):
            os.makedirs(self.log_dir)
        self.checkpoint = cfg.train.checkpoint

        previous = self.find_previous()
        previous = False
        if previous:
            self.start_epoch = previous[0][-1]
            self.resume_checkpoint(previous[1][-1])
        else:
            self.start_epoch = self.initialize()
        if self.use_gpu:
            print('Utilize GPUs for computation')
            print('Number of GPU available', torch.cuda.device_count())
            self.model.cuda()
            self.priors.cuda()
            cudnn.benchmark = True
            if cfg.ngpu > 1:
                self.model = torch.nn.DataParallel(self.model,
                                                   device_ids=list(
                                                       range(cfg.ngpu)))
        # Print the model architecture and parameters
        #print('Model architectures:\n{}\n'.format(self.model))

        #print('Parameters and size:')
        #for name, param in self.model.named_parameters():
        #    print('{}: {}'.format(name, list(param.size())))
        # print trainable scope
        print('Trainable scope: {}'.format(cfg.train.train_scope))
        self.optimizer = self.configure_optimizer(trainable_param,
                                                  cfg.train.optimizer)
        self.exp_lr_scheduler = self.configure_lr_scheduler(
            self.optimizer, cfg.train.lr_scheduler)
        self.max_epochs = cfg.train.lr_scheduler.max_epochs
        # metric
        if cfg.network.multi_box_loss_type == 'origin':
            self.criterion = MultiBoxLoss2(cfg.matcher, self.priors,
                                           self.use_gpu)
        else:
            print('ERROR: ' + cfg.multi_box_loss_type + ' is not supported')
            sys.exit()
        # Set the logger
        self.writer = SummaryWriter(log_dir=self.log_dir)
        self.checkpoint_prefix = cfg.name + '_' + cfg.dataset.dataset

    def save_checkpoints(self, epochs, iters=None):
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)
        if iters:
            filename = self.checkpoint_prefix + '_epoch_{:d}_iter_{:d}'.format(
                epochs, iters) + '.pth'
        else:
            filename = self.checkpoint_prefix + '_epoch_{:d}'.format(
                epochs) + '.pth'
        filename = os.path.join(self.output_dir, filename)
        torch.save(self.model.state_dict(), filename)
        with open(os.path.join(self.output_dir, 'checkpoint_list.txt'),
                  'a') as f:
            f.write('epoch {epoch:d}: {filename}\n'.format(epoch=epochs,
                                                           filename=filename))
        print('Wrote snapshot to: {:s}'.format(filename))

        # TODO: write relative cfg under the same page

    def resume_checkpoint(self, resume_checkpoint):
        if resume_checkpoint == '' or not os.path.isfile(resume_checkpoint):
            print(("=> no checkpoint found at '{}'".format(resume_checkpoint)))
            return False
        print(("=> loading checkpoint '{:s}'".format(resume_checkpoint)))
        checkpoint = torch.load(resume_checkpoint)

        # print("=> Weigths in the checkpoints:")
        # print([k for k, v in list(checkpoint.items())])

        # remove the module in the parrallel model
        if 'module.' in list(checkpoint.items())[0][0]:
            pretrained_dict = {
                '.'.join(k.split('.')[1:]): v
                for k, v in list(checkpoint.items())
            }
            checkpoint = pretrained_dict

        resume_scope = self.cfg.train.resume_scope
        # extract the weights based on the resume scope
        if resume_scope != '':
            pretrained_dict = {}
            for k, v in list(checkpoint.items()):
                for resume_key in resume_scope.split(','):
                    if resume_key in k:
                        pretrained_dict[k] = v
                        break
            checkpoint = pretrained_dict

        pretrained_dict = {
            k: v
            for k, v in checkpoint.items() if k in self.model.state_dict()
        }
        # print("=> Resume weigths:")
        # print([k for k, v in list(pretrained_dict.items())])

        checkpoint = self.model.state_dict()

        unresume_dict = set(checkpoint) - set(pretrained_dict)
        if len(unresume_dict) != 0:
            print("=> UNResume weigths:")
            print(unresume_dict)

        checkpoint.update(pretrained_dict)

        return self.model.load_state_dict(checkpoint)

    def find_previous(self):
        if not os.path.exists(
                os.path.join(self.output_dir, 'checkpoint_list.txt')):
            return False
        with open(os.path.join(self.output_dir, 'checkpoint_list.txt'),
                  'r') as f:
            lineList = f.readlines()
        epoches, resume_checkpoints = [list() for _ in range(2)]
        for line in lineList:
            epoch = int(line[line.find('epoch ') +
                             len('epoch '):line.find(':')])
            checkpoint = line[line.find(':') + 2:-1]
            epoches.append(epoch)
            resume_checkpoints.append(checkpoint)
        return epoches, resume_checkpoints

    def weights_init(self, m):
        for key in m.state_dict():
            if key.split('.')[-1] == 'weight':
                if 'conv' in key:
                    init.kaiming_normal(m.state_dict()[key], mode='fan_out')
                if 'bn' in key:
                    m.state_dict()[key][...] = 1
            elif key.split('.')[-1] == 'bias':
                m.state_dict()[key][...] = 0

    def initialize(self):
        # TODO: ADD INIT ways
        # raise ValueError("Fan in and fan out can not be computed for tensor with less than 2 dimensions")
        # for module in self.cfg.TRAIN.TRAINABLE_SCOPE.split(','):
        #     if hasattr(self.model, module):
        #         getattr(self.model, module).apply(self.weights_init)
        if self.checkpoint:
            print('Loading initial model weights from {:s}'.format(
                self.checkpoint))
            self.resume_checkpoint(self.checkpoint)
            return cfg.train.resume_epoch
        else:
            self.model.init_model(cfg.network.basenet)
            return 0

    def trainable_param(self, trainable_scope):
        for param in self.model.parameters():
            param.requires_grad = False

        trainable_param = []
        for module in trainable_scope.split(','):
            if hasattr(self.model, module):
                # print(getattr(self.model, module))
                for param in getattr(self.model, module).parameters():
                    param.requires_grad = True
                trainable_param.extend(
                    getattr(self.model, module).parameters())

        return trainable_param

    def train_model(self):

        # export graph for the model, onnx always not works
        # self.export_graph()

        # warm_up epoch
        for epoch in iter(range(self.start_epoch + 1, self.max_epochs + 1)):
            #learning rate
            sys.stdout.write('\rEpoch {epoch:d}/{max_epochs:d}:\n'.format(
                epoch=epoch, max_epochs=self.max_epochs))
            self.exp_lr_scheduler.step(epoch - cfg.train.lr_scheduler.warmup)
            if 'train' in cfg.phase:
                self.train_epoch(self.model, self.train_loader, self.optimizer,
                                 self.criterion, self.writer, epoch,
                                 self.use_gpu)
            if 'eval' in cfg.phase and epoch % cfg.test_frequency == 0:
                self.eval_epoch(self.model, self.eval_loader, self.detector,
                                self.criterion, self.writer, epoch,
                                self.use_gpu)
            #if 'test' in cfg.PHASE:
            #    self.test_epoch(self.model, self.test_loader, self.detector, self.output_dir, self.use_gpu)
            #if 'visualize' in cfg.PHASE:
            #    self.visualize_epoch(self.model, self.visualize_loader, self.priorbox, self.writer, epoch,  self.use_gpu)

            if epoch % cfg.train.save_frequency == 0:
                self.save_checkpoints(epoch)

    def train_epoch(self, model, data_loader, optimizer, criterion, writer,
                    epoch, use_gpu):
        model.train()

        epoch_size = len(data_loader)
        batch_iterator = iter(data_loader)

        loc_loss = 0
        conf_loss = 0
        _t = Timer()

        for iteration in iter(range((epoch_size))):
            with torch.no_grad():
                images, targets = next(batch_iterator)
                if use_gpu:
                    images = images.cuda()
                    targets = [anno.cuda() for anno in targets]
            _t.tic()
            # forward
            out = model(images)

            # backprop
            optimizer.zero_grad()
            loss_l, loss_c = criterion(out, targets)

            # some bugs in coco train2017. maybe the annonation bug.
            if loss_l.item() == float("Inf"):
                continue

            loss = loss_l + loss_c
            loss.backward()
            optimizer.step()

            time = _t.toc()
            loc_loss += loss_l.item()
            conf_loss += loss_c.item()

            # log per iter
            log = '\r==>Train: || {iters:d}/{epoch_size:d} in {time:.3f}s [{prograss}] || loc_loss: {loc_loss:.4f} cls_loss: {cls_loss:.4f}\r'.format(
                prograss='#' * int(round(10 * iteration / epoch_size)) +
                '-' * int(round(10 * (1 - iteration / epoch_size))),
                iters=iteration,
                epoch_size=epoch_size,
                time=time,
                loc_loss=loss_l.item(),
                cls_loss=loss_c.item())

            sys.stdout.write(log)
            sys.stdout.flush()

        # log per epoch
        sys.stdout.write('\r')
        sys.stdout.flush()
        lr = optimizer.param_groups[0]['lr']
        log = '\r==>Train: || Total_time: {time:.3f}s || loc_loss: {loc_loss:.4f} conf_loss: {conf_loss:.4f} || lr: {lr:.6f}\n'.format(
            lr=lr,
            time=_t.total_time,
            loc_loss=loc_loss / epoch_size,
            conf_loss=conf_loss / epoch_size)
        sys.stdout.write(log)
        sys.stdout.flush()

        # log for tensorboard
        writer.add_scalar('Train/loc_loss', loc_loss / epoch_size, epoch)
        writer.add_scalar('Train/conf_loss', conf_loss / epoch_size, epoch)
        writer.add_scalar('Train/lr', lr, epoch)

    def eval_epoch(self, model, data_loader, detector, output_dir, use_gpu):

        model.eval()
        dataset = data_loader.dataset
        num_images = len(testset)
        num_classes = cfg.dataset.num_classes
        all_boxes = [[[] for _ in range(num_images)]
                     for _ in range(num_classes)]

        _t = {'im_detect': Timer(), 'misc': Timer()}
        det_file = os.path.join(self.output_dir, 'detections.pkl')

        if cfg.test.retest:
            f = open(det_file, 'rb')
            all_boxes = pickle.load(f)
            print('Evaluating detections')
            testset.evaluate_detections(all_boxes, save_folder)
            return

        for i in range(num_images):
            img = testset.pull_image(i)
            with torch.no_grad():
                x = transform(img).unsqueeze(0)
            if cuda:
                x = x.to(torch.device("cuda"))

            _t['im_detect'].tic()
            out = net(x=x, test=True)  # forward pass
            boxes, scores = detector.forward(out, self.priors)
            detect_time = _t['im_detect'].toc()
            boxes = boxes[0]
            scores = scores[0]

            boxes = boxes.cpu().numpy()
            scores = scores.cpu().numpy()
            # scale each detection back up to the image
            scale = torch.Tensor(
                [img.shape[1], img.shape[0], img.shape[1],
                 img.shape[0]]).cpu().numpy()
            boxes *= scale

            _t['misc'].tic()

            for j in range(1, num_classes):
                inds = np.where(
                    scores[:, j] > cfg.post_process.score_threshold)[0]
                if len(inds) == 0:
                    all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
                    continue
                c_bboxes = boxes[inds]
                c_scores = scores[inds, j]
                c_dets = np.hstack(
                    (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
                keep = nms(c_dets, cfg.post_process.nms, force_cpu=False)
                c_dets = c_dets[keep, :]
                all_boxes[j][i] = c_dets
            if cfg.post_process.max_per_image > 0:
                image_scores = np.hstack(
                    [all_boxes[j][i][:, -1] for j in range(1, num_classes)])
                if len(image_scores) > max_per_image:
                    image_thresh = np.sort(image_scores)[-max_per_image]
                    for j in range(1, num_classes):
                        keep = np.where(
                            all_boxes[j][i][:, -1] >= image_thresh)[0]
                        all_boxes[j][i] = all_boxes[j][i][keep, :]

            nms_time = _t['misc'].toc()

            if i % 20 == 0:
                print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format(
                    i + 1, num_images, detect_time, nms_time))
                _t['im_detect'].clear()
                _t['misc'].clear()

        with open(det_file, 'wb') as f:
            pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

        print('Evaluating detections')
        if args.dataset == 'VOC':
            APs, mAP = testset.evaluate_detections(all_boxes, save_folder)
        else:
            testset.evaluate_detections(all_boxes, save_folder)

    def configure_optimizer(self, trainable_param, cfg):
        if cfg.optimizer == 'sgd':
            optimizer = optim.SGD(trainable_param,
                                  lr=cfg.lr,
                                  momentum=cfg.momentum,
                                  weight_decay=cfg.weight_decay)
        elif cfg.optimizer == 'rmsprop':
            optimizer = optim.RMSprop(trainable_param,
                                      lr=cfg.lr,
                                      momentum=cfg.momentum,
                                      alpha=cfg.alpha,
                                      eps=cfg.eps,
                                      weight_decay=cfg.weight_decay)
        elif cfg.optimizer == 'adam':
            optimizer = optim.Adam(trainable_param,
                                   lr=cfg.lr,
                                   betas=(cfg.beta1, cfg.beta2),
                                   eps=cfg.eps,
                                   weight_decay=cfg.weight_decay)
        else:
            AssertionError('optimizer can not be recognized.')
        return optimizer

    def configure_lr_scheduler(self, optimizer, cfg):
        if cfg.lr_decay_type == 'multi-step':
            scheduler = lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=cfg.steps,
                                                 gamma=cfg.gamma)
        elif cfg.lr_decay_type == 'exponential':
            scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=cfg.gamma)
        elif cfg.lr_decay_type == 'cos':
            scheduler = lr_scheduler.CosineAnnealingLR(optimizer,
                                                       T_max=cfg.max_epochs)
        else:
            AssertionError('scheduler can not be recognized.')
        return scheduler

    #TODO: export graph
    def export_graph(self):
        pass
Exemple #11
0
def main():

    means = (104, 117, 123)  # only support voc now
    args.save_root += args.dataset + '/'
    args.data_root += args.dataset + '/'
    for eval_gap in [int(g) for g in args.eval_gaps.split(',')]:
        args.eval_gap = eval_gap
        

        args.print_step = 10
        args.fusion_type = args.fusion_type.upper()
        args.fusion = args.fusion_type in ['SUM','CAT','MEAN']
        ## Define the experiment Name will used for save directory and ENV for visdom
        if not args.fusion:
            args.exp_name = 'AMTNet-{}-s{:d}-{}-sl{:02d}sg{:02d}-bs{:02d}-lr{:05d}'.format(args.dataset, args.train_split,
                                                                                    args.input_type_base.upper(),
                                                                                    args.seq_len, args.seq_gap, 
                                                                                    args.batch_size, int(args.lr * 100000))
        else:
            args.exp_name = 'AMTNet-{}-s{:d}-{}-{}-{}-sl{:02d}sg{:02d}-bs{:02d}-lr{:05d}'.format(args.dataset, args.train_split,
                                                                                    args.fusion_type, args.input_type_base,
                                                                                    args.input_type_extra,
                                                                                    args.seq_len, args.seq_gap, 
                                                                                    args.batch_size,int(args.lr * 100000))
        print(args.exp_name, ' eg::=> ', eval_gap)
    

        args.cfg = v2
        args.num_classes = len(CLASSES[args.dataset]) + 1  # 7 +1 background
        
        # Get proior or anchor boxes
        with torch.no_grad():
            priorbox = PriorBox(v2, args.seq_len)
            priors = priorbox.forward()
            priors = priors.cuda()
            num_feat_multiplier = {'CAT': 2, 'SUM': 1, 'MEAN': 1, 'NONE': 1}
            # fusion type can one of the above keys
            args.fmd = [512, 1024, 512, 256, 256, 256]
            args.kd = 3
            args.fusion_num_muliplier = num_feat_multiplier[args.fusion_type]

            dataset = ActionDetection(args, 'test', BaseTransform(args.ssd_dim, means), NormliseBoxes(), full_test=False)

            ## DEFINE THE NETWORK
            net = AMTNet(args)
            if args.ngpu>1:
                print('\nLets do dataparallel\n\n')
                net = torch.nn.DataParallel(net)
        
                # Load dataset

            for iteration in [int(it) for it in args.eval_iters.split(',')]:
                fname = args.save_root + 'cache/' + args.exp_name + "/testing-{:d}-eg{:d}.log".format(iteration, eval_gap)
                log_file = open(fname, "w", 1)
                log_file.write(args.exp_name + '\n')
                print(fname)
                trained_model_path = args.save_root + 'cache/' + args.exp_name + '/AMTNet_' + repr(iteration) + '.pth'
                log_file.write(trained_model_path+'\n')
                # trained_model_path = '/mnt/sun-alpha/ss-workspace/CVPR2018_WORK/ssd.pytorch_exp/UCF24/guru_ssd_pipeline_weights/ssd300_ucf24_90000.pth'

                net.load_state_dict(torch.load(trained_model_path))
                print('Finished loading model %d !' % iteration)
                net.eval()
                net = net.cuda()
                
                # evaluation
                torch.cuda.synchronize()
                tt0 = time.perf_counter()
                log_file.write('Testing net \n')
                
                mAP, ap_all, ap_strs = test_net(net, priors, args, dataset, iteration)
                for ap_str in ap_strs:
                    print(ap_str)
                    log_file.write(ap_str + '\n')
                ptr_str = '\nMEANAP:::=>' + str(mAP) + '\n'
                print(ptr_str)
                log_file.write(ptr_str)
                torch.cuda.synchronize()
                print('Complete set time {:0.2f}'.format(time.perf_counter() - tt0))
                log_file.close()
Exemple #12
0
def train(args):
    cfg = (VOC_300, VOC_512)[args.size == '512']
    if args.version == 'SSD_VGG_Mobile_Little':
        from models.SSD_VGG_Mobile_Little import build_net
        cfg = VEHICLE_240
    elif args.version == 'SSD_VGG_Optim_FPN_RFB':
        from models.SSD_VGG_Optim_FPN_RFB import build_net
    elif args.version == 'SSD_ResNet_FPN':
        from models.SSD_ResNet_FPN import build_net
    elif args.version == 'SSD_HRNet':
        from models.SSD_HRNet import build_net
    elif args.version == 'EfficientDet':
        from models.EfficientDet import build_net
    elif args.version == 'SSD_DetNet':
        from models.SSD_DetNet import build_net
        cfg = DetNet_300
    elif args.version == 'SSD_M2Det':
        from models.SSD_M2Det import build_net
        cfg = M2Det_320
    elif args.version == 'SSD_Pelee':
        from models.SSD_Pelee import build_net
    else:
        args.version = 'SSD_VGG_RFB'
        from models.SSD_VGG_RFB import build_net

    if args.loss == "OHEM":
        criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5,
                                 False)
    elif args.loss == "GIOU":
        criterion = GIOUMultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5,
                                     False)
    elif args.loss == "DIOU":
        criterion = GIOUMultiBoxLoss(num_classes,
                                     0.5,
                                     True,
                                     0,
                                     True,
                                     3,
                                     0.5,
                                     False,
                                     loss_name='Diou')
    elif args.loss == "CIOU":
        criterion = GIOUMultiBoxLoss(num_classes,
                                     0.5,
                                     True,
                                     0,
                                     True,
                                     3,
                                     0.5,
                                     False,
                                     loss_name='Ciou')
    elif args.loss == "FocalLoss":
        criterion = FocalLossMultiBoxLoss(num_classes, 0.5, True, 0, True, 3,
                                          0.5, False, args.anchor)

    if 'withneg' in DATASET:
        train_sets = [
            (DATASET.replace('_withneg', ''), 'trainval_withneg'),
        ]
    else:
        train_sets = [
            (DATASET.replace('_withneg', ''), 'trainval'),
        ]

    if args.resume_epoch == 0:
        args.save_folder = os.path.join(
            args.save_folder, DATASET, args.version, args.loss + '_' +
            args.anchor + '_' + args.fpn_type + '_bz' + str(args.bz))
        if not os.path.exists(args.save_folder):
            os.makedirs(args.save_folder)
    else:
        args.save_folder = Path(args.resume_net).parent

    try:
        net = build_net('train', cfg['min_dim'], num_classes, args.fpn_type)
    except:
        net = build_net('train', cfg['min_dim'], num_classes)

    print(args.save_folder)
    try:
        flops, params = get_model_complexity_info(
            net, (cfg['min_dim'], cfg['min_dim']), print_per_layer_stat=False)
        print('FLOPs:', flops, 'Params:', params)
    except:
        pass

    init_net(net, args.resume_net
             )  # init the network with pretrained weights or resumed weights

    if args.ngpu > 1:
        net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))
    if args.cuda:
        net.cuda()
        cudnn.benchmark = True

    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=momentum,
                          weight_decay=weight_decay)

    priorbox = PriorBox(cfg)
    with torch.no_grad():
        priors = priorbox.forward()
        if args.cuda:
            priors = priors.cuda()

    dataset = VOCDetection(VOCroot, train_sets,
                           preproc(cfg['min_dim'], rgb_means, p),
                           AnnotationTransform())
    len_dataset = len(dataset)
    epoch_size = len_dataset // args.bz
    max_iter = args.max_epoch * epoch_size
    print(train_sets, 'len_dataset:', len_dataset, 'max_iter:', max_iter)

    stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size)
    stepvalues = stepvalues_VOC
    step_index = 0
    if args.resume_epoch > 0:
        start_iter = args.resume_epoch * epoch_size
    else:
        start_iter = 0
    if start_iter > stepvalues[0] and start_iter < stepvalues[1]:
        step_index = 1
    elif start_iter > stepvalues[1] and start_iter < stepvalues[2]:
        step_index = 2
    elif start_iter > stepvalues[2]:
        step_index = 3

    net.train()
    writer = SummaryWriter(args.save_folder)
    loc_loss = 0
    conf_loss = 0
    epoch = 0 + args.resume_epoch
    for iteration in range(start_iter, max_iter):
        if iteration % epoch_size == 0:
            batch_iterator = iter(
                data.DataLoader(dataset,
                                args.bz,
                                shuffle=True,
                                num_workers=args.num_workers,
                                collate_fn=detection_collate,
                                pin_memory=True))
            loc_loss = 0
            conf_loss = 0
            if (epoch % 5 == 0 and epoch > 0) or (epoch % 5 == 0
                                                  and epoch > 200):
                torch.save(net.state_dict(),
                           os.path.join(args.save_folder,
                                        str(epoch) + '.pth'))
            epoch += 1

        if iteration in stepvalues:
            step_index += 1
        lr = adjust_learning_rate(args.lr, optimizer, gamma, epoch, step_index,
                                  iteration, epoch_size)

        images, targets = next(batch_iterator)
        # print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets]))

        if args.cuda:
            images = Variable(images.cuda())
            targets = [Variable(anno.cuda()) for anno in targets]
        else:
            images = Variable(images)
            targets = [Variable(anno) for anno in targets]

        out = net(images)

        optimizer.zero_grad()
        loss_l, loss_c = criterion(out, priors, targets)
        loss = loss_l + loss_c
        loss.backward()
        optimizer.step()

        loc_loss += loss_l.item()
        conf_loss += loss_c.item()

        if iteration % 10 == 0:
            print('Epoch:' + repr(epoch) + '||EpochIter: ' +
                  repr(iteration % epoch_size) + '/' + repr(epoch_size) +
                  '||Totel iter ' + repr(iteration) + '||L: %.4f C: %.4f' %
                  (loss_l.item(), loss_c.item()) + '||LR: %.8f' % (lr))
            writer.add_scalar('Train/total_loss',
                              (loss_l.item() + loss_c.item()), iteration)
            writer.add_scalar('Train/loc_loss', loss_l.item(), iteration)
            writer.add_scalar('Train/conf_loss', loss_c.item(), iteration)
            writer.add_scalar('Train/lr', lr, iteration)

    torch.save(net.state_dict(),
               os.path.join(args.save_folder,
                            str(args.max_epoch) + '.pth'))
Exemple #13
0
    net = torch.nn.DataParallel(net, device_ids=args.gpu_id)

if args.cuda:
    net.cuda()
    cudnn.benchmark = True

optimizer = optim.SGD(net.parameters(),
                      lr=args.lr,
                      momentum=args.momentum,
                      weight_decay=args.weight_decay)
#optimizer = optim.RMSprop(net.parameters(), lr=args.lr,alpha = 0.9, eps=1e-08,
#                     momentum=args.momentum, weight_decay=args.weight_decay)

criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False)
priorbox = PriorBox(cfg)
priors = Variable(priorbox.forward(), volatile=True)
#dataset
print('Loading Dataset...')
if args.dataset == 'VOC':
    testset = VOCDetection(VOCroot, [('2007', 'test')], None,
                           AnnotationTransform())
    train_dataset = VOCDetection(VOCroot, train_sets,
                                 preproc(img_dim, rgb_means, p, rgb_std),
                                 AnnotationTransform())
elif args.dataset == 'COCO':
    testset = COCODetection(COCOroot, [('2014', 'minival')], None)
    train_dataset = COCODetection(COCOroot, train_sets,
                                  preproc(img_dim, rgb_means, p, rgb_std))
else:
    print('Only VOC and COCO are supported now!')
    exit()
Exemple #14
0
    net.load_state_dict(new_state_dict)

if args.ngpu > 1:
    net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))

if args.cuda:
    net.cuda()
    cudnn.benchmark = True

detector = Detect(num_classes, 0, cfg)
optimizer = optim.SGD(net.parameters(), lr=args.lr,
                      momentum=args.momentum, weight_decay=args.weight_decay)

criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False)
priorbox = PriorBox(cfg)
priors = Variable(priorbox.forward())
# dataset
print('Loading Dataset...')
if args.dataset == 'VOC':
    testset = VOCDetection(
        VOCroot, [('2007', 'test')], None, AnnotationTransform())
    train_dataset = VOCDetection(VOCroot, train_sets, preproc(
        img_dim, rgb_means, rgb_std, p), AnnotationTransform())
elif args.dataset == 'COCO':
    testset = COCODetection(
        COCOroot, [('2017', 'val')], None)
    #testset = COCODetection(COCOroot, [('2017', 'test-dev')], None)
    train_dataset = COCODetection(COCOroot, train_sets, preproc(
        img_dim, rgb_means, rgb_std, p))
else:
    print('Only VOC and COCO are supported now!')
Exemple #15
0
def demo(v_f):
    cfg = Config.fromfile(config_f)
    anchor_config = anchors(cfg)
    priorbox = PriorBox(anchor_config)
    net = build_net('test',
                    size=cfg.model.input_size,
                    config=cfg.model.m2det_config)
    init_net(net, cfg, checkpoint_path)
    net.eval().to(device)
    with torch.no_grad():
        priors = priorbox.forward().to(device)
    _preprocess = BaseTransform(
        cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1))
    detector = Detect(cfg.model.m2det_config.num_classes,
                      cfg.loss.bkg_label, anchor_config)
    logging.info('detector initiated.')

    cap = cv2.VideoCapture(v_f)
    logging.info('detect on: {}'.format(v_f))
    logging.info('video width: {}, height: {}'.format(int(cap.get(3)), int(cap.get(4))))
    out_video = cv2.VideoWriter("result.mp4", cv2.VideoWriter_fourcc(*'MJPG'), 24, (int(cap.get(3)), int(cap.get(4))))

    while True:
        ret, image = cap.read()
        if not ret:
            out_video.release()
            cv2.destroyAllWindows()
            cap.release()
            break
        w, h = image.shape[1], image.shape[0]
        img = _preprocess(image).unsqueeze(0).to(device)
        scale = torch.Tensor([w, h, w, h])
        out = net(img)
        boxes, scores = detector.forward(out, priors)
        boxes = (boxes[0]*scale).cpu().numpy()
        scores = scores[0].cpu().numpy()
        allboxes = []
        for j in range(1, cfg.model.m2det_config.num_classes):
            inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0]
            if len(inds) == 0:
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(
                np.float32, copy=False)
            soft_nms = cfg.test_cfg.soft_nms
            # min_thresh, device_id=0 if cfg.test_cfg.cuda else None)
            keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms)
            keep = keep[:cfg.test_cfg.keep_per_class]
            c_dets = c_dets[keep, :]
            allboxes.extend([_.tolist()+[j] for _ in c_dets])
        if len(allboxes) > 0:
            allboxes = np.array(allboxes)
            # [boxes, scores, label_id] -> [id, score, boxes] 0, 1, 2, 3, 4, 5
            allboxes = allboxes[:, [5, 4, 0, 1, 2, 3]]
            logging.info('allboxes shape: {}'.format(allboxes.shape))
            res = visualize_det_cv2(image, allboxes, classes=classes, thresh=0.2)
            # res = visualize_det_cv2_fancy(image, allboxes, classes=classes, thresh=0.2, r=4, d=6)
            cv2.imshow('rr', res)
            out_video.write(res)
            cv2.waitKey(1)
Exemple #16
0
                             c7_channel=args.c7_channel)
     else:
         net = None
     print('loading model!', args.model_dir, args.iteration)
     net.load_state_dict(torch.load(trained_model))
     print(net)
     net.eval()
     print('Finished loading model!', args.model_dir, args.iteration,
           'tub=' + str(args.tub), 'tub_thresh=' + str(args.tub_thresh),
           'tub_score=' + str(args.tub_generate_score))
     detector = Detect(num_classes, 0, args.top_k,
                       args.confidence_threshold, args.nms_threshold)
     priorbox = PriorBox(cfg)
     # priorbox=PriorBox(multi_cfg['2.2'])
     with torch.no_grad():
         priors = priorbox.forward().to(device)
     # load data
     net = net.to(device)
     # evaluation
     test_net(args.save_folder, net, dataset,
              BaseTransform(net.size, dataset_mean), args.top_k, detector,
              priors)
 else:
     out_dir = get_output_dir(
         pkl_dir, args.iteration + '_' + args.dataset_name + '_' +
         args.set_file_name)
     print('Without detection', out_dir)
     do_python_eval(out_dir)
 print('Finished!', args.model_dir, args.iteration, 'tub=' + str(args.tub),
       'tub_thresh=' + str(args.tub_thresh),
       'tub_score=' + str(args.tub_generate_score))
Exemple #17
0
def main():
    mean = (104, 117, 123)
    print('loading model!')
    if deform:
        from model.dualrefinedet_vggbn import build_net
        net = build_net('test',
                        size=ssd_dim,
                        num_classes=num_classes,
                        c7_channel=1024,
                        def_groups=deform,
                        multihead=multihead,
                        bn=bn)
    else:
        from model.refinedet_vgg import build_net
        net = build_net('test',
                        size=ssd_dim,
                        num_classes=num_classes,
                        use_refine=refine,
                        c7_channel=1024,
                        bn=bn)
    net.load_state_dict(torch.load(trained_model))
    net.eval()
    print('Finished loading model!', trained_model)
    net = net.to(device)
    detector = Detect(num_classes, 0, top_k, confidence_threshold,
                      nms_threshold)
    priorbox = PriorBox(cfg)
    with torch.no_grad():
        priors = priorbox.forward().to(device)
    for i, line in enumerate(open(img_set, 'r')):
        # if i==10:
        #     break
        if 'COCO' in dataset:
            image_name = line[:-1]
            image_id = int(image_name.split('_')[-1])
        elif 'VOC' in dataset:
            image_name = line[:-1]
            image_id = -1
        else:
            image_name, image_id = line.split(' ')
            image_id = image_id[:-1]
        print(i, image_name, image_id)
        image_path = os.path.join(img_root, image_name + '.jpg')
        image = cv2.imread(image_path, 1)
        h, w, _ = image.shape
        image_draw = cv2.resize(image.copy(), (640, 480))
        im_trans = base_transform(image, ssd_dim, mean)
        ######################## Detection ########################
        with torch.no_grad():
            x = torch.from_numpy(im_trans).unsqueeze(0).permute(0, 3, 1,
                                                                2).to(device)
            if 'RefineDet' in backbone and refine:
                arm_loc, _, loc, conf = net(x)
            else:
                loc, conf = net(x)
                arm_loc = None
            detections = detector.forward(loc,
                                          conf,
                                          priors,
                                          arm_loc_data=arm_loc)
        ############################################################
        out = list()
        for j in range(1, detections.size(1)):
            dets = detections[0, j, :]
            if dets.sum() == 0:
                continue
            mask = dets[:, 0].gt(0.).expand(dets.size(-1), dets.size(0)).t()
            dets = torch.masked_select(dets, mask).view(-1, dets.size(-1))
            boxes = dets[:, 1:-1] if dets.size(-1) == 6 else dets[:, 1:]
            boxes[:, 0] *= w
            boxes[:, 2] *= w
            boxes[:, 1] *= h
            boxes[:, 3] *= h
            scores = dets[:, 0].cpu().numpy()
            boxes_np = boxes.cpu().numpy()

            for b, s in zip(boxes_np, scores):
                if save_dir:
                    out.append(
                        [int(b[0]),
                         int(b[1]),
                         int(b[2]),
                         int(b[3]), j - 1, s])
                    if 'COCO' in dataset:
                        det_list.append({
                            'image_id':
                            image_id,
                            'category_id':
                            labelmap[j],
                            'bbox': [
                                float('{:.1f}'.format(b[0])),
                                float('{:.1f}'.format(b[1])),
                                float('{:.1f}'.format(b[2] - b[0] + 1)),
                                float('{:.1f}'.format(b[3] - b[1] + 1))
                            ],
                            'score':
                            float('{:.2f}'.format(s))
                        })
                    else:
                        results_file.write(
                            str(image_id) + ' ' + str(j) + ' ' + str(s) + ' ' +
                            str(np.around(b[0], 2)) + ' ' +
                            str(np.around(b[1], 2)) + ' ' +
                            str(np.around(b[2], 2)) + ' ' +
                            str(np.around(b[3], 2)) + '\n')
                if display:
                    cv2.rectangle(image_draw,
                                  (int(b[0] / w * 640), int(b[1] / h * 480)),
                                  (int(b[2] / w * 640), int(b[3] / h * 480)),
                                  (0, 255, 0),
                                  thickness=1)

                    cls = class_name[j] if 'COCO' in dataset else str(
                        labelmap[j - 1])
                    put_str = cls + ':' + str(np.around(s, decimals=2))
                    cv2.putText(
                        image_draw,
                        put_str,
                        (int(b[0] / w * 640), int(b[1] / h * 480) - 10),
                        cv2.FONT_HERSHEY_DUPLEX,
                        0.5,
                        color=(0, 255, 0),
                        thickness=1)
        if display:
            cv2.imshow('frame', image_draw)
            ch = cv2.waitKey(0)
            if ch == 115:
                if save_dir:
                    print('save: ', line)
                    torch.save(
                        out, os.path.join(save_dir, '%s.pkl' % str(line[:-1])))
                    cv2.imwrite(
                        os.path.join(save_dir, '%s.jpg' % str(line[:-1])),
                        image)
                    cv2.imwrite(
                        os.path.join(save_dir, '%s_box.jpg' % str(line[:-1])),
                        image_draw)

    cv2.destroyAllWindows()
    if save_dir:
        if dataset == 'COCO':
            json.dump(det_list, results_file)
        results_file.close()
def train():
    # network set-up
    ssd_net = build_refine('train',
                           cfg['min_dim'],
                           cfg['num_classes'],
                           use_refine=True,
                           use_tcb=True)
    net = ssd_net

    if args.cuda:
        net = torch.nn.DataParallel(
            ssd_net)  # state_dict will have .module. prefix
        cudnn.benchmark = True

    if args.resume:
        print('Resuming training, loading {}...'.format(args.resume))
        ssd_net.load_weights(args.resume)
    else:
        print('Using preloaded base network...')  # Preloaded.
        print('Initializing other weights...')
        # initialize newly added layers' weights with xavier method
        ssd_net.extras.apply(weights_init)
        ssd_net.trans_layers.apply(weights_init)
        ssd_net.latent_layrs.apply(weights_init)
        ssd_net.up_layers.apply(weights_init)
        ssd_net.arm_loc.apply(weights_init)
        ssd_net.arm_conf.apply(weights_init)
        ssd_net.odm_loc.apply(weights_init)
        ssd_net.odm_conf.apply(weights_init)

    if args.cuda:
        net = net.cuda()

    # otimizer and loss set-up
    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    arm_criterion = RefineMultiBoxLoss(2, 0.5, True, 0, True, 3, 0.5, False, 0,
                                       args.cuda)
    odm_criterion = RefineMultiBoxLoss(
        cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, 0.01,
        args.cuda)  # 0.01 -> 0.99 negative confidence threshold

    # different from normal ssd, where the PriorBox is stored inside SSD object
    priorbox = PriorBox(cfg)
    priors = Variable(priorbox.forward(), volatile=True)
    # detector used in test_net for testing
    detector = RefineDetect(cfg['num_classes'], 0, cfg, object_score=0.01)

    net.train()
    # loss counters
    loc_loss = 0
    conf_loss = 0
    epoch = 0
    print('Loading the dataset...')

    epoch_size = len(dataset) // args.batch_size
    print('Training refineDet on:', dataset.name)
    print('Using the specified args:')
    print(args)

    if args.visdom:
        import visdom
        viz = visdom.Visdom()
        # initialize visdom loss plot
        vis_title = 'SSD.PyTorch on ' + dataset.name
        vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss']
        iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend)
        epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend)

    # adjust learning rate based on epoch
    stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size)
    stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size)
    stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO']
    step_index = 0

    # training data loader
    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=True)
    # create batch iterator
    batch_iterator = iter(data_loader)
    #    batch_iterator = None
    mean_odm_loss_c = 0
    mean_odm_loss_l = 0
    mean_arm_loss_c = 0
    mean_arm_loss_l = 0
    # max_iter = cfg['max_epoch'] * epoch_size
    for iteration in range(args.start_iter,
                           cfg['max_epoch'] * epoch_size + 10):
        try:
            images, targets = next(batch_iterator)
        except StopIteration:
            batch_iterator = iter(
                data_loader)  # the dataloader cannot re-initilize
            images, targets = next(batch_iterator)

        if args.visdom and iteration != 0 and (iteration % epoch_size == 0):
            # update visdom loss plot
            update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None,
                            'append', epoch_size)
            # reset epoch loss counters
            loc_loss = 0
            conf_loss = 0

        if iteration != 0 and (iteration % epoch_size == 0):
            #        adjust_learning_rate(optimizer, args.gamma, epoch)
            # evaluation
            if args.evaluate == True:
                # load net
                net.eval()
                APs, mAP = test_net(args.eval_folder,
                                    net,
                                    detector,
                                    priors,
                                    args.cuda,
                                    val_dataset,
                                    BaseTransform(net.module.size,
                                                  cfg['testset_mean']),
                                    args.max_per_image,
                                    thresh=args.confidence_threshold
                                    )  # 320 originally for cfg['min_dim']
                net.train()
            epoch += 1

        # update learning rate
        if iteration in stepvalues:
            step_index = stepvalues.index(iteration) + 1
        lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index,
                                  iteration, epoch_size)

        if args.cuda:
            images = Variable(images.cuda())
            targets = [Variable(ann.cuda(), volatile=True) for ann in targets]
        else:
            images = Variable(images)
            targets = [Variable(ann, volatile=True) for ann in targets]
        # forward
        t0 = time.time()
        out = net(images)
        arm_loc, arm_conf, odm_loc, odm_conf = out
        # backprop
        optimizer.zero_grad()
        #arm branch loss
        #priors = priors.type(type(images.data)) #convert to same datatype
        arm_loss_l, arm_loss_c = arm_criterion((arm_loc, arm_conf), priors,
                                               targets)
        #odm branch loss
        odm_loss_l, odm_loss_c = odm_criterion(
            (odm_loc, odm_conf), priors, targets, (arm_loc, arm_conf), False)

        mean_arm_loss_c += arm_loss_c.data[0]
        mean_arm_loss_l += arm_loss_l.data[0]
        mean_odm_loss_c += odm_loss_c.data[0]
        mean_odm_loss_l += odm_loss_l.data[0]

        loss = arm_loss_l + arm_loss_c + odm_loss_l + odm_loss_c
        loss.backward()
        optimizer.step()
        t1 = time.time()

        if iteration % 10 == 0:
            print('Epoch:' + repr(epoch) + ' || epochiter: ' +
                  repr(iteration % epoch_size) + '/' + repr(epoch_size) +
                  '|| Total iter ' + repr(iteration) +
                  ' || AL: %.4f AC: %.4f OL: %.4f OC: %.4f||' %
                  (mean_arm_loss_l / 10, mean_arm_loss_c / 10,
                   mean_odm_loss_l / 10, mean_odm_loss_c / 10) +
                  'Timer: %.4f sec. ||' % (t1 - t0) + 'Loss: %.4f ||' %
                  (loss.data[0]) + 'LR: %.8f' % (lr))

            mean_odm_loss_c = 0
            mean_odm_loss_l = 0
            mean_arm_loss_c = 0
            mean_arm_loss_l = 0


#        if args.visdom:
#            update_vis_plot(iteration, loss_l.data[0], loss_c.data[0],
#                            iter_plot, epoch_plot, 'append')

        if iteration != 0 and iteration % 5000 == 0:
            print('Saving state, iter:', iteration)
            torch.save(ssd_net.state_dict(),
                       'weights/ssd300_refineDet_' + repr(iteration) + '.pth')

    torch.save(ssd_net.state_dict(),
               args.save_folder + '' + args.dataset + '.pth')
Exemple #19
0
def get_prior():
    cfg = (VOC_300, VOC_512)[args.size == '512']
    priorbox = PriorBox(cfg)
    priors = Variable(priorbox.forward(), volatile=True)
    return priors
class EfficientDet(nn.Module):
    def __init__(self,
                 num_class=21,
                 levels=3,
                 num_channels=128,
                 model_name='efficientnet-b0'):
        super(EfficientDet, self).__init__()
        self.num_class = num_class
        self.levels = levels
        self.num_channels = num_channels
        self.efficientnet = EfficientNet.from_pretrained(model_name)

        self.cfg = (coco, voc)[num_class == 21]
        self.priorbox = PriorBox(self.cfg)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.num_anchor = 9
        self.class_module = list()
        self.regress_module = list()
        for _ in range(3, 8):
            self.class_module.append(
                nn.Sequential(
                    nn.Conv2d(in_channels=self.num_channels,
                              out_channels=64,
                              kernel_size=2,
                              stride=1),
                    nn.Conv2d(in_channels=64,
                              out_channels=self.num_anchor * num_class,
                              kernel_size=2,
                              stride=1)))
            self.regress_module.append(
                nn.Sequential(
                    nn.Conv2d(in_channels=self.num_channels,
                              out_channels=64,
                              kernel_size=2,
                              stride=1),
                    nn.Conv2d(in_channels=64,
                              out_channels=self.num_anchor * 4,
                              kernel_size=2,
                              stride=1)))
            self.BIFPN = BIFPN(in_channels=[40, 80, 112, 192, 320],
                               out_channels=self.num_channels,
                               num_outs=5)
            self.sigmoid = nn.Sigmoid()

    def forward(self, inputs):

        P1, P2, P3, P4, P5, P6, P7 = self.efficientnet(inputs)
        P3, P4, P5, P6, P7 = self.BIFPN([P3, P4, P5, P6, P7])
        feature_classes = []
        feature_bboxes = []
        for i, p in enumerate([P3, P4, P5, P6, P7]):
            feature_class = self.class_module[i](p)
            feature_class = feature_class.view(-1, self.num_class)
            feature_class = self.sigmoid(feature_class)
            feature_classes.append(feature_class)

            feature_bbox = self.regress_module[i](p)
            feature_bbox = feature_bbox.view(-1, 4)
            feature_bbox = self.sigmoid(feature_bbox)
            feature_bboxes.append(feature_bbox)
        feature_classes = torch.cat(feature_classes, axis=0)
        feature_bboxes = torch.cat(feature_bboxes, axis=0)

        output = (feature_bboxes.view(inputs.size(0), -1, 4),
                  feature_classes.view(inputs.size(0), -1,
                                       self.num_class), self.priors)
        return output
class SSD(nn.Module):
    """Single Shot Multibox Architecture
    The network is composed of a base VGG network followed by the
    added multibox conv layers.  Each multibox layer branches into
        1) conv2d for class conf scores
        2) conv2d for localization predictions
        3) associated priorbox layer to produce default bounding
           boxes specific to the layer's feature map size.
    See: https://arxiv.org/pdf/1512.02325.pdf for more details.

    Args:
        phase: (string) Can be "test" or "train"
        size: input image size
        base: VGG16 layers for input, size of either 300 or 500
        extras: extra layers that feed to multibox loc and conf layers
        head: "multibox head" consists of loc and conf conv layers
    """
    def __init__(self, phase, size, base, extras, head, num_classes):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = (coco, voc)[num_classes == 21]
        self.priorbox = PriorBox(self.cfg)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.size = size

        # SSD network
        self.vgg = nn.ModuleList(base)
        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])

        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)

    def forward(self, x):
        """Applies network layers and ops on input image(s) x.

        Args:
            x: input image or batch of images. Shape: [batch,3,300,300].

        Return:
            Depending on phase:
            test:
                Variable(tensor) of output class label predictions,
                confidence score, and corresponding location predictions for
                each object detected. Shape: [batch,topk,7]

            train:
                list of concat outputs from:
                    1: confidence layers, Shape: [batch*num_priors,num_classes]
                    2: localization layers, Shape: [batch,num_priors*4]
                    3: priorbox layers, Shape: [2,num_priors*4]
        """
        sources = list()
        loc = list()
        conf = list()

        # apply vgg up to conv4_3 relu
        for k in range(23):
            x = self.vgg[k](x)

        s = self.L2Norm(x)
        sources.append(s)

        # apply vgg up to fc7
        for k in range(23, len(self.vgg)):
            x = self.vgg[k](x)
        sources.append(x)

        # apply extra layers and cache source layer outputs
        for k, v in enumerate(self.extras):
            x = F.relu(v(x), inplace=True)
            if k % 2 == 1:
                sources.append(x)

        # apply multibox head to source layers
        for (x, l, c) in zip(sources, self.loc, self.conf):
            loc.append(l(x).permute(0, 2, 3, 1).contiguous())
            conf.append(c(x).permute(0, 2, 3, 1).contiguous())

        loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
        conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
        if self.phase == "test":
            output = self.detect(
                loc.view(loc.size(0), -1, 4),  # loc preds
                self.softmax(conf.view(conf.size(0), -1,
                                       self.num_classes)),  # conf preds
                self.priors.type(type(x.data))  # default boxes
            )
        else:
            output = (loc.view(loc.size(0), -1,
                               4), conf.view(conf.size(0), -1,
                                             self.num_classes), self.priors)
        return output

    def load_weights(self, base_file):
        other, ext = os.path.splitext(base_file)
        if ext == '.pkl' or '.pth':
            print('Loading weights into state dict...')
            self.load_state_dict(
                torch.load(base_file,
                           map_location=lambda storage, loc: storage))
            print('Finished!')
        else:
            print('Sorry only .pth and .pkl files supported.')
Exemple #22
0
if args.ngpu > 1:
    net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))

if args.cuda:
    net.cuda()
    cudnn.benchmark = True


optimizer = optim.SGD(net.parameters(), lr=args.lr,
                      momentum=args.momentum, weight_decay=args.weight_decay)
#optimizer = optim.RMSprop(net.parameters(), lr=args.lr,alpha = 0.9, eps=1e-08,
#                      momentum=args.momentum, weight_decay=args.weight_decay)

criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False)
priorbox = PriorBox(cfg)
priors = Variable(priorbox.forward(), volatile=True)


def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0 + args.resume_epoch
    print('Loading Dataset...')

    if args.dataset == 'VOC':
        dataset = VOCDetection(VOCroot, train_sets, preproc(
            img_dim, rgb_means, p), AnnotationTransform())
    elif args.dataset == 'COCO':
        dataset = COCODetection(COCOroot, train_sets, preproc(
Exemple #23
0
def train():
    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    if args.dataset == 'VOC':
        train_sets = [('2007', 'trainval'), ('2012', 'trainval')]
        cfg = (VOC_300, VOC_512)[args.size == '512']
    else:
        train_sets = [('2014', 'train'), ('2014', 'valminusminival')]
        cfg = (COCO_300, COCO_512)[args.size == '512']

    if args.version == 'RFB_vgg':
        from models.RFB_Net_vgg import build_net
    elif args.version == 'RFB_E_vgg':
        from models.RFB_Net_E_vgg import build_net
    elif args.version == 'RFB_d2':
        from models.RFB_Net_vgg_d2 import build_net
    elif args.version == 'RFB_d3':
        from models.RFB_Net_vgg_d3 import build_net
    elif args.version == 'RFB_d4':
        from models.RFB_Net_vgg_d4 import build_net
    elif args.version == 'RFB_d4_fpn':
        from models.RFB_Net_vgg_d4_fpn import build_net
    elif args.version == 'RFB_mobile':
        from models.RFB_Net_mobile import build_net
        cfg = COCO_mobile_300
    else:
        print('Unkown version!')
    logging.info('build model version: {}'.format(args.version))

    img_dim = (300, 512)[args.size == '512']
    rgb_means = ((104, 117, 123), (103.94, 116.78,
                                   123.68))[args.version == 'RFB_mobile']
    p = (0.6, 0.2)[args.version == 'RFB_mobile']
    # 738:6 classes ; 2392:7 ; 8718:6
    num_classes = (21, 81)[args.dataset == 'COCO']
    logging.info('dataset number of classes: {}'.format(num_classes))
    batch_size = args.batch_size
    weight_decay = 0.0005
    gamma = 0.1
    momentum = 0.9

    net = build_net('train', img_dim, num_classes)
    # print(net)
    if args.resume_net == None:
        base_weights = torch.load(args.basenet)
        from collections import OrderedDict
        print('Loading base network...')
        net.base.load_state_dict(base_weights)

        def xavier(param):
            init.xavier_uniform(param)

        def weights_init(m):
            for key in m.state_dict():
                if key.split('.')[-1] == 'weight':
                    if 'conv' in key:
                        init.kaiming_normal_(m.state_dict()[key],
                                             mode='fan_out')
                    if 'bn' in key:
                        m.state_dict()[key][...] = 1
                elif key.split('.')[-1] == 'bias':
                    m.state_dict()[key][...] = 0

        print('Initializing weights...')
        # initialize newly added layers' weights with kaiming_normal method
        net.extras.apply(weights_init)
        net.loc.apply(weights_init)
        net.conf.apply(weights_init)
        net.Norm.apply(weights_init)
        if args.version == 'RFB_E_vgg':
            net.reduce.apply(weights_init)
            net.up_reduce.apply(weights_init)
    else:
        # load resume network
        print('Loading resume network...')
        state_dict = torch.load(args.resume_net)
        # create new OrderedDict that does not contain `module.`
        from collections import OrderedDict
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            head = k[:7]
            if head == 'module.':
                name = k[7:]  # remove `module.`
            else:
                name = k
            new_state_dict[name] = v
        net.load_state_dict(new_state_dict)

    if args.ngpu > 1:
        net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))

    if args.cuda:
        net.cuda()
        cudnn.benchmark = True

    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    # optimizer = optim.RMSprop(net.parameters(), lr=args.lr,alpha = 0.9, eps=1e-08,
    #                      momentum=args.momentum, weight_decay=args.weight_decay)
    criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False)
    priorbox = PriorBox(cfg)
    with torch.no_grad():
        priors = priorbox.forward()
        if args.cuda:
            priors = priors.cuda()

    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0 + args.resume_epoch

    logging.info('Loading Dataset: {}'.format(args.dataset))
    if args.dataset == 'VOC':
        dataset = VOCDetection(VOCroot, train_sets,
                               preproc(img_dim, rgb_means, p),
                               AnnotationTransform())
    elif args.dataset == 'COCO':
        dataset = COCODetection(COCOroot, train_sets,
                                preproc(img_dim, rgb_means, p))
    else:
        print('Only VOC and COCO are supported now!')
        return

    epoch_size = len(dataset) // args.batch_size
    max_iter = args.max_epoch * epoch_size

    stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size)
    stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size)
    stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO']
    print('Training', args.version, 'on', dataset.name)
    step_index = 0

    if args.resume_epoch > 0:
        start_iter = args.resume_epoch * epoch_size
    else:
        start_iter = 0

    lr = args.lr
    for iteration in range(start_iter, max_iter):
        if iteration % epoch_size == 0:
            # create batch iterator
            batch_iterator = iter(
                data.DataLoader(dataset,
                                batch_size,
                                shuffle=True,
                                num_workers=args.num_workers,
                                collate_fn=detection_collate))
            loc_loss = 0
            conf_loss = 0
            if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0
                                                   and epoch > 200):
                torch.save(
                    net.state_dict(), args.save_folder + args.version + '_' +
                    args.dataset + '_epoches_' + repr(epoch) + '.pth')
            epoch += 1

        load_t0 = time.time()
        if iteration in stepvalues:
            step_index += 1
        lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index,
                                  iteration, epoch_size)

        # load train data
        images, targets = next(batch_iterator)
        #print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets]))
        if args.cuda:
            images = Variable(images.cuda())
            targets = [Variable(anno.cuda()) for anno in targets]
        else:
            images = Variable(images)
            targets = [Variable(anno) for anno in targets]
        # forward
        t0 = time.time()
        out = net(images)
        # backprop
        optimizer.zero_grad()
        loss_l, loss_c = criterion(out, priors, targets)
        loss = loss_l + loss_c
        loss.backward()
        optimizer.step()
        t1 = time.time()
        loc_loss += loss_l.item()
        conf_loss += loss_c.item()
        load_t1 = time.time()
        if iteration % 10 == 0:
            print('Epoch:' + repr(epoch) + ' || epochiter: ' +
                  repr(iteration % epoch_size) + '/' + repr(epoch_size) +
                  '|| Totel iter ' + repr(iteration) +
                  ' || L: %.4f C: %.4f||' % (loss_l.item(), loss_c.item()) +
                  'Batch time: %.4f sec. ||' % (load_t1 - load_t0) +
                  'LR: %.8f' % (lr))
    torch.save(
        net.state_dict(),
        os.path.join(args.save_folder,
                     'Final_' + args.version + '_' + args.dataset + '.pth'))
Exemple #24
0
def test_net(save_folder, net, dataset, transform, top_k, detector, priors):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(dataset)
    # all detections are collected into:score
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(len(labelmap) + 1)]

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}
    all_time = 0.
    output_dir = get_output_dir(
        pkl_dir,
        args.iteration + '_' + args.dataset_name + '_' + args.set_file_name)
    det_file = os.path.join(output_dir, 'detections.pkl')
    output_dir = get_output_dir(output_dir, 'multi_test')
    ######################### Multiscale PriorBox #####################
    priorboxes = {}
    for v1 in multi_scale[str(ssd_dim)]:
        if not multi_cfg[str(v1)]:
            return ("not included this multi_scale")
        priorbox = PriorBox(multi_cfg[str(v1)])
        img_size = multi_cfg[str(v1)]['min_dim']
        with torch.no_grad():
            priorboxes[str(img_size)] = priorbox.forward().to(device)
    ########################## Detection ##############################
    for i in range(num_images):
        _t['im_detect'].tic()
        image = dataset.pull_image(i)
        h, w, _ = image.shape
        detections_multi = {}
        for v in multi_scale[str(ssd_dim)]:
            priors = priorboxes[str(v)]
            ssd_dim_temp = int(v)
            for loop in range(2):
                if (loop == 0):
                    im_trans = base_transform(image, ssd_dim_temp,
                                              dataset_mean)
                    im_trans = im_trans[:, :, (2, 1, 0)]
                else:
                    im_f = image.copy()
                    im_f = cv2.flip(im_f, 1)
                    im_trans = base_transform(im_f, ssd_dim_temp, dataset_mean)
                    im_trans = im_trans[:, :, (2, 1, 0)]
                with torch.no_grad():
                    x = torch.from_numpy(im_trans).unsqueeze(0).permute(
                        0, 3, 1, 2).to(device)
                    if 'RefineDet' in args.backbone and args.refine:
                        arm_loc, _, loc, conf = net(x)
                        detections = detector.forward(loc,
                                                      conf,
                                                      priors,
                                                      arm_loc_data=arm_loc)
                        detections_multi[str(ssd_dim) + '_' + str(v) + '_' +
                                         str(loop)] = detections.clone()
                    else:
                        loc, conf = net(x)
                        arm_loc = None
                        detections = detector.forward(loc,
                                                      conf,
                                                      priors,
                                                      arm_loc_data=arm_loc)
                        detections_multi[str(ssd_dim) + '_' + str(v) + '_' +
                                         str(loop)] = detections.clone()
        detect_time = _t['im_detect'].toc(average=False)
        if i > 10:
            all_time += detect_time
    ###################################################################
        for j in range(1, detections.size(1)):
            cls_dets = np.array([])
            for k, d in detections_multi.items():
                dets = d[0, j, :]
                if dets.sum() == 0:
                    continue
                mask = dets[:, 0].gt(0.).expand(dets.size(-1),
                                                dets.size(0)).t()
                dets = torch.masked_select(dets, mask).view(-1, dets.size(-1))
                boxes = dets[:, 1:-1] if dets.size(-1) == 6 else dets[:, 1:]
                if (k[-1] == '1'):
                    boxes[:, 0] = 1 - boxes[:, 0]
                    boxes[:, 2] = 1 - boxes[:, 2]
                    temp_swap = boxes[:, 0].clone()
                    boxes[:, 0] = boxes[:, 2]
                    boxes[:, 2] = temp_swap
                boxes[:, 0] *= w
                boxes[:, 2] *= w
                boxes[:, 1] *= h
                boxes[:, 3] *= h
                if k in ['320_192_0', '320_192_1', '512_320_0', '512_320_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.maximum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) > 32)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in ['320_320_0', '320_320_1', '512_512_0', '512_512_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.maximum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) > 0)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in ['320_384_0', '320_384_1', '512_640_0', '512_640_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.minimum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) < 160)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in ['320_448_0', '320_448_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.minimum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) < 128)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in ['320_512_0', '320_512_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.minimum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) < 96)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in ['320_576_0', '320_576_1']:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.minimum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) < 64)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                elif k in [
                        '320_706_0', '320_706_1', '512_1216_0', '512_1216_1'
                ]:
                    boxes_np = boxes.cpu().numpy()
                    index_temp = np.where(
                        np.minimum(boxes_np[:, 2] - boxes_np[:, 0] +
                                   1, boxes_np[:, 3] - boxes_np[:, 1] +
                                   1) < 32)[0]
                    if (not index_temp.size):
                        continue
                    else:
                        boxes = boxes[index_temp, :]
                if (index_temp.size == 0):
                    continue
                scores = dets[index_temp, 0].cpu().numpy()
                cls_dets_temp = np.hstack((boxes.cpu().numpy(), scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                if (cls_dets.size == 0):
                    cls_dets = cls_dets_temp.copy()
                else:
                    cls_dets = np.concatenate((cls_dets, cls_dets_temp),
                                              axis=0)
            if (cls_dets.size != 0):
                cls_dets = bbox_vote(cls_dets)
                if (len(cls_dets) != 0):
                    all_boxes[j][i] = cls_dets
        print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images,
                                                    detect_time))
    FPS = (num_images - 10) / all_time
    print('FPS:', FPS)
    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    evaluate_detections(all_boxes, output_dir, dataset, FPS=FPS)
Exemple #25
0
def test(img_path, model_path='weights/RFB_vgg_COCO_30.3.pth'):
    img_path = img_path
    trained_model = model_path
    if torch.cuda.is_available():
        cuda = True
    if 'mobile' in model_path:
        cfg = COCO_mobile_300
    else:
        cfg = COCO_300

    priorbox = PriorBox(cfg)
    with torch.no_grad():
        priors = priorbox.forward()
        if cuda:
            priors = priors.cuda()
    numclass = 81

    img = cv2.imread(img_path)
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    if 'mobile' in model_path:
        net = build_rfb_mobilenet('test', 300, numclass)  # initialize detector
    else:
        net = build_rfb_vgg_net('test', 300, numclass)  # initialize detector

    transform = BaseTransform(net.size, (123, 117, 104), (2, 0, 1))
    with torch.no_grad():
        x = transform(img).unsqueeze(0)
        x = Variable(x)
        if cuda:
            x = x.cuda()
            scale = scale.cuda()
    state_dict = torch.load(trained_model)['state_dict']
    # create new OrderedDict that does not contain `module.`
    from collections import OrderedDict
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        head = k[:7]
        if head == 'module.':
            name = k[7:]  # remove `module.`
        else:
            name = k
        new_state_dict[name] = v
    net.load_state_dict(new_state_dict)
    net.eval()
    if cuda:
        net = net.cuda()
        cudnn.benchmark = True
    else:
        net = net.cpu()
    print('Finished loading model!')
    # print(net)
    detector = Detect(numclass, 0, cfg)

    tic = time.time()
    out = net(x)  # forward pass

    boxes, scores = detector.forward(out, priors)
    print('Finished in {}'.format(time.time() - tic))
    boxes = boxes[0]
    scores = scores[0]
    boxes *= scale
    boxes = boxes.cpu().numpy()
    scores = scores.cpu().numpy()
    # Create figure and axes
    # Display the image
    # scale each detection back up to the image
    for j in range(1, numclass):
        # print(max(scores[:, j]))
        inds = np.where(scores[:, j] > 0.6)[0]
        # conf > 0.6
        if inds is None:
            continue
        c_bboxes = boxes[inds]
        c_scores = scores[inds, j]
        c_dets = np.hstack((c_bboxes, c_scores[:,
                                               np.newaxis])).astype(np.float32,
                                                                    copy=False)
        keep = nms(c_dets, 0.6)
        c_dets = c_dets[keep, :]
        c_bboxes = c_dets[:, :4]

        # print(c_bboxes.shape)
        # print(c_bboxes.shape[0])
        if c_bboxes.shape[0] != 0:
            # print(c_bboxes.shape)
            print('{}: {}'.format(j, c_bboxes))
            for box in c_bboxes:
                cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                              (0, 255, 0), 1, 0)
                cv2.putText(img, '{}'.format(j), (box[0], box[1]),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2,
                            cv2.LINE_AA)
    cv2.imshow('rr', img)
    cv2.waitKey(0)
Exemple #26
0
def handler(context):
    dataset_alias = context.datasets

    trainval_dataset_id = dataset_alias['trainval']
    test_dataset_id = dataset_alias['test']

    trainval_dataset = list(load_dataset_from_api(trainval_dataset_id))
    test_dataset = list(load_dataset_from_api(test_dataset_id))

    trainval = DetectionDatasetFromAPI(trainval_dataset,
                                       transform=SSDAugmentation(
                                           min_dim, MEANS))
    test = DetectionDatasetFromAPI(test_dataset,
                                   transform=SSDAugmentation(min_dim, MEANS))
    train_dataset = trainval
    test_dataset = test

    priorbox = PriorBox(min_dim, PARAMS)
    with torch.no_grad():
        priors = priorbox.forward().to(device)

    ssd_net = build_ssd('train', priors, min_dim, num_classes)
    ssd_net = ssd_net.to(device)

    url = 'https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth'
    weight_file = os.path.join(ABEJA_TRAINING_RESULT_DIR,
                               'vgg16_reducedfc.pth')
    download(url, weight_file)

    vgg_weights = torch.load(weight_file)
    print('Loading base network...')
    ssd_net.vgg.load_state_dict(vgg_weights)

    optimizer = optim.SGD(ssd_net.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=5e-4)
    criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False,
                             PARAMS['variance'], device)

    # loss counters
    step_index = 0

    trainloader = data.DataLoader(train_dataset,
                                  batch_size,
                                  num_workers=0,
                                  shuffle=True,
                                  collate_fn=tools.detection_collate,
                                  pin_memory=True)

    testloader = data.DataLoader(test_dataset,
                                 batch_size,
                                 num_workers=0,
                                 shuffle=False,
                                 collate_fn=tools.detection_collate,
                                 pin_memory=True)

    # create batch iterator
    iteration = 1
    while iteration <= max_iter:
        ssd_net.train()
        for images, targets in trainloader:
            if iteration > max_iter:
                break

            if iteration in lr_steps:
                step_index += 1
                adjust_learning_rate(optimizer, 0.1, step_index)

            # load train data
            images = images.to(device)
            targets = [ann.to(device) for ann in targets]

            # forward
            out = ssd_net(images)

            # backprop
            optimizer.zero_grad()
            loss_l, loss_c = criterion(out, targets)
            loss = loss_l + loss_c
            loss.backward()
            optimizer.step()

            if iteration % 100 == 0:
                print('[Train] iter {}, loss: {:.4f}'.format(
                    iteration, loss.item()))
                statistics(iteration, loss.item(), None, None, None)
                writer.add_scalar('main/loss', loss.item(), iteration)
                writer.add_scalar('main/loc_loss', loss_l.item(), iteration)
                writer.add_scalar('main/conf_loss', loss_c.item(), iteration)

            if iteration % 10000 == 0:
                eval(testloader, ssd_net, criterion, iteration)
                ssd_net.train()

            iteration += 1
    torch.save(ssd_net.state_dict(),
               os.path.join(ABEJA_TRAINING_RESULT_DIR, 'model.pth'))
Exemple #27
0
class Trainer(object):
    def __init__(self, model, cfg, train_loader, val_loader, save_epochs,
                 **kwargs):
        self.kwargs = kwargs
        self.cfg = cfg

        self.save_epochs = save_epochs
        self.device = torch.device(
            "cuda") if torch.cuda.is_available() else torch.device("cpu")
        self.model = model.to(self.device)
        self.train_loader = train_loader
        assert isinstance(
            self.train_loader,
            DataLoader), 'train_loader must be DataLoader instance.'

        self.num_classes = self.train_loader.dataset.classes
        self.val_loader = val_loader

        self.start_epoch = 0

        self.loss = None
        self.optimizer = None
        self._create_optimization()

        self.resume_from = self.kwargs['resume_from']
        self.checkpoint_dir = self.kwargs['checkpoint_dir']
        self.load_pretrained_model()
        self.load_checkpoint(self.kwargs['resume_from'])

    def train(self, epochs=1000):
        print('Start to train...')
        try:
            for e in range(self.start_epoch, epochs):
                i = 0
                for data, target in self.train_loader:
                    i += 1

                    images = Variable(data.to(device))
                    targets = [Variable(anno.to(device)) for anno in target]

                    out = self.model(images)

                    try:
                        self.optimizer.zero_grad()
                        loss_l, loss_c = self.criterion(
                            out, self.priors, targets)
                        loss = loss_l + loss_c
                        loss.backward()
                        self.optimizer.step()
                        if i % 10 == 0:
                            print(
                                'Epoch: {}, iter: {}, loc_loss: {}, cls_loss: {}'
                                .format(e, i, loss_l, loss_c))
                    except Exception as _e:
                        print('Got loss error in train: {}'.format(_e))
                        print('continue....')
                        continue

                if e % self.save_epochs == 0:
                    print('Saving checkpoints at epoch: {}'.format(e))
                    self.save_checkpoint(
                        {
                            'epoch': e + 1,
                            'state_dict': self.model.state_dict(),
                            'optimizer': self.optimizer.state_dict(),
                        },
                        is_best=False)
                # if e % 2 == 0:
                #     print('Checking prediction ouput...')
                #     print('label vs predict:')
                #     a = np.array([np.argmax(i) for i in output.detach().cpu().numpy()])
                #     b = target.cpu().numpy()
                #     print(b)
                #     print(a)
                #     c = [i for i in a - b if i == 0]
                #     print('accuracy: {}%\n'.format((len(c) / len(a)) * 100))

        except KeyboardInterrupt:
            print('Interrupted, saving checkpoints at epoch: {}'.format(e))
            self.save_checkpoint(
                {
                    'epoch': e + 1,
                    'state_dict': self.model.state_dict(),
                    'optimizer': self.optimizer.state_dict(),
                },
                is_best=False)

    def save_checkpoint(self, state, is_best):
        torch.save(
            state, os.path.join(self.kwargs['checkpoint_dir'],
                                self.resume_from))
        if is_best:
            shutil.copyfile(
                os.path.join(self.kwargs['checkpoint_dir'], self.resume_from),
                os.path.join(self.kwargs['checkpoint_dir'],
                             'final_best_{}'.format(self.resume_from)))

    def _create_optimization(self):
        self.optimizer = optim.SGD(self.model.parameters(),
                                   lr=4e-3,
                                   weight_decay=0,
                                   momentum=0)
        self.criterion = MultiBoxLoss(self.num_classes, 0.5, True, 0, True, 3,
                                      0.5, False).to(device)
        self.priorbox = PriorBox(self.cfg)
        with torch.no_grad():
            self.priors = self.priorbox.forward()
            if torch.cuda.is_available():
                self.priors = self.priors.cuda()

    def load_pretrained_model(self):
        if 'pretrained_path' in self.kwargs.keys():
            print('Loading pretrained weights...')
            pretrained_dict = torch.load(self.kwargs['pretrained_path'])
            self.model.load_state_dict(pretrained_dict)
            print('Pretrained model load successful.')
        else:
            print('No pretrained path provide, skip this step.')

    def load_checkpoint(self, filename):
        if not os.path.exists(self.kwargs['checkpoint_dir']):
            os.makedirs(self.kwargs['checkpoint_dir'])
        else:
            filename = os.path.join(self.kwargs['checkpoint_dir'], filename)
            if os.path.exists(filename) and os.path.isfile(filename):
                print('Loading checkpoint {}'.format(filename))
                checkpoint = torch.load(filename)
                self.start_epoch = checkpoint['epoch']
                # self.best_top1 = checkpoint['best_top1']
                self.model.load_state_dict(checkpoint['state_dict'])
                self.optimizer.load_state_dict(checkpoint['optimizer'])
                print(
                    'checkpoint loaded successful from {} at epoch {}'.format(
                        filename, self.start_epoch))
            else:
                print('No checkpoint exists from {}, skip load checkpoint...'.
                      format(filename))
Exemple #28
0
parser = argparse.ArgumentParser()
parser.add_argument("--prune_folder", default = "prunes/")
parser.add_argument("--trained_model", default = "prunes/refineDet_trained.pth")
parser.add_argument('--dataset_root', default=VOC_ROOT)
parser.add_argument("--cut_ratio", default=0.2, type=float)
parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda to train model')
#for test_net: 200 in SSD paper, 200 for COCO, 300 for VOC
parser.add_argument('--max_per_image', default=200, type=int,
                    help='Top number of detections kept per image, further restrict the number of predictions to parse')
args = parser.parse_args()

cfg = voc320

# different from normal ssd, where the PriorBox is stored inside SSD object
priorbox = PriorBox(cfg)
priors = Variable(priorbox.forward().cuda(), volatile=True) # set the priors to cuda
detector = RefineDetect(cfg['num_classes'], 0, cfg, object_score=0.01)

def test_net(save_folder, net, detector, priors, cuda,
             testset, transform, max_per_image=200, thresh=0.05): # max_per_image is same as top_k

    if not os.path.exists(save_folder):
        os.mkdir(save_folder)

    num_images = len(testset)
    num_classes = len(labelmap)                      # +1 for background
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(num_classes)]
# if not os.path.exists(args.save_folder):
#     os.mkdir(args.save_folder)

VOC_dataset_map = {'300':VOC_300, '320':VOC_320, '512':VOC_512}
if args.dataset == 'VOC':
    cfg = VOC_dataset_map[args.size]
else:
    cfg = (COCO_320, COCO_512)[args.size == '512']

if args.version == 'ATiny_pelee':
    from models.ATiny_pelee import build_net
else:
    print('Unkown version!')

priorbox = PriorBox(cfg)
priors = Variable(priorbox.forward(), volatile=True)    #Make Sure not to backward
priors = priors.cpu()

def py_cpu_nms(dets, thresh):
    """Pure Python NMS baseline."""
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    scores = dets[:, 4]

    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]  #descending order

    keep = []
    while order.size > 0:
Exemple #30
0
                    type=str, help='Trained state_dict file path to open')
parser.add_argument('--cuda', default=True, type=bool,
                    help='Use cuda to train model')
parser.add_argument('--cpu', default=False, type=bool,
                    help='Use cpu nms')
args = parser.parse_args()


cfg = VOC_Config
img_dim = 300
num_classes = 2
rgb_means = (104, 117, 123)

priorbox = PriorBox(cfg)
with torch.no_grad():
    priors = priorbox.forward()
    if args.cuda:
        priors = priors.cuda()


class ObjectDetector:
    def __init__(self, net, detection, transform, num_classes=2, thresh=0.1, cuda=True):
        self.net = net
        self.detection = detection
        self.transform = transform
        self.num_classes = num_classes
        self.thresh = thresh
        self.cuda = cuda

    def predict(self, img):
        _t = {'im_detect': Timer(), 'misc': Timer()}
Exemple #31
0
class BlazeFace(nn.Module):
    """Constructs a BlazeFace model

    the original paper
    https://sites.google.com/view/perception-cv4arvr/blazeface
    """

    def __init__(self, phase, num_classes):
        super(BlazeFace, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.conv_1 = nn.Conv2d(3, 24, kernel_size=3, stride=2, padding=1, bias=True)
        self.bn_1 = nn.BatchNorm2d(24)
        self.relu = nn.ReLU(inplace=True)
        self.blaze_1 = BlazeBlock(24, 24)
        self.blaze_2 = BlazeBlock(24, 24)
        self.blaze_3 = BlazeBlock(24, 48, stride=2)
        self.blaze_4 = BlazeBlock(48, 48)
        self.blaze_5 = BlazeBlock(48, 48)
        self.blaze_6 = BlazeBlock(48, 24, 96, stride=2)
        self.blaze_7 = BlazeBlock(96, 24, 96)
        self.blaze_8 = BlazeBlock(96, 24, 96)
        self.blaze_9 = BlazeBlock(96, 24, 96, stride=2)
        self.blaze_10 = BlazeBlock(96, 24, 96)
        self.blaze_11 = BlazeBlock(96, 24, 96)

        self.apply(initialize)

        self.head = mbox([self.blaze_9, self.blaze_10], [2, 6], 2)

        self.loc = nn.ModuleList(self.head[0])
        self.conf = nn.ModuleList(self.head[1])

        self.cfg = (wider_face)
        # print(self.cfg)
        self.priorbox = PriorBox(self.cfg)
        self.priors = Variable(self.priorbox.forward(), volatile=True)

        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)

    def forward(self, x):
        h = self.conv_1(x)
        h = self.bn_1(h)
        h = self.relu(h)
        h = self.blaze_1(h)
        h = self.blaze_2(h)
        h = self.blaze_3(h)
        h = self.blaze_4(h)
        h = self.blaze_5(h)
        h = self.blaze_6(h)
        h = self.blaze_7(h)
        h1 = self.blaze_8(h)
        h = self.blaze_9(h1)

        h = self.blaze_10(h)
        h2 = self.blaze_11(h)

        # @todo: need to cache outputs from each detection layer, not just h(final output)
        # these will be stored in h ( should be a list )

        # @todo: second argument to multibox ([6, 6, 4, 4, 4, 6] is wrong and based on SSD, but
        # I can't seem to find the correct priorbox numbers for multibox

        # @ todo: once these issues are fixed and code works till returning output, training should work

        loc = list()
        conf = list()
        for (x, l, c) in zip([h1, h2], self.loc, self.conf):
            # print(l)
            # print(x)
            # print('l(x) shape:',  l(x).shape)
            # print(f"x shape: {x.shape}")
            print('l(x)', type(l(x)))
            print('l', type(l))
            print('x', type(x))
            print('x shape', x.shape)
            # print('l(x) shape', l(x).shape)

            # print('type self.loc', type(loc))
            print('l(x) permuted shape', l(x).permute(0, 2, 3, 1).shape)
            loc.append(l(x).permute(0, 2, 3, 1).contiguous())
            conf.append(c(x).permute(0, 2, 3, 1).contiguous())
        o = loc[0]
        print('o shape', (o.view(o.size(0 ), -1)).shape)
        loc = torch.cat([o.view(o.size(0 ), -1) for o in loc], 1)
        conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
        print ("loc shape:", loc.shape)
        print ("conf shape:", conf.shape)
        print ("conf.size(0):", conf.size(0))
        print ("loc.size(0)", loc.size(0))
        if self.phase == "test":
            print ("In test mode")
            output = self.detect(
                loc.view(loc.size(0), -1, 4),  # loc preds
                conf.view(conf.size(0), -1, self.num_classes),  # conf preds
                self.priors # default boxes
            )
        else:
            output = (
                loc.view(loc.size(0), -1, 4),
                conf.view(conf.size(0), -1, self.num_classes),
                self.priors
            )
        # print(output)
        # print(output[0].shape)
        # print(output[1].shape)
        # print(output[2].shape)
        return output